drivers/gpu/drm/panfrost/panfrost_job.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
   3 /* Copyright 2019 Collabora ltd. */
   4 #include <linux/delay.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/io.h>
   7 #include <linux/iopoll.h>
   8 #include <linux/platform_device.h>
   9 #include <linux/pm_runtime.h>
  10 #include <linux/dma-resv.h>
  11 #include <drm/gpu_scheduler.h>
  12 #include <drm/panfrost_drm.h>
  13
  14 #include "panfrost_device.h"
  15 #include "panfrost_devfreq.h"
  16 #include "panfrost_job.h"
  17 #include "panfrost_features.h"
  18 #include "panfrost_issues.h"
  19 #include "panfrost_gem.h"
  20 #include "panfrost_regs.h"
  21 #include "panfrost_gpu.h"
  22 #include "panfrost_mmu.h"
  23 #include "panfrost_dump.h"
  24
  25 #define JOB_TIMEOUT_MS 500
  26
  27 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
  28 #define job_read(dev, reg) readl(dev->iomem + (reg))
  29
  30 struct panfrost_queue_state {
  31         struct drm_gpu_scheduler sched;
  32         u64 fence_context;
  33         u64 emit_seqno;
  34 };
  35
  36 struct panfrost_job_slot {
  37         struct panfrost_queue_state queue[NUM_JOB_SLOTS];
  38         spinlock_t job_lock;
  39         int irq;
  40 };
  41
  42 static struct panfrost_job *
  43 to_panfrost_job(struct drm_sched_job *sched_job)
  44 {
  45         return container_of(sched_job, struct panfrost_job, base);
  46 }
  47
  48 struct panfrost_fence {
  49         struct dma_fence base;
  50         struct drm_device *dev;
  51         /* panfrost seqno for signaled() test */
  52         u64 seqno;
  53         int queue;
  54 };
  55
  56 static inline struct panfrost_fence *
  57 to_panfrost_fence(struct dma_fence *fence)
  58 {
  59         return (struct panfrost_fence *)fence;
  60 }
  61
  62 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
  63 {
  64         return "panfrost";
  65 }
  66
  67 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
  68 {
  69         struct panfrost_fence *f = to_panfrost_fence(fence);
  70
  71         switch (f->queue) {
  72         case 0:
  73                 return "panfrost-js-0";
  74         case 1:
  75                 return "panfrost-js-1";
  76         case 2:
  77                 return "panfrost-js-2";
  78         default:
  79                 return NULL;
  80         }
  81 }
  82
  83 static const struct dma_fence_ops panfrost_fence_ops = {
  84         .get_driver_name = panfrost_fence_get_driver_name,
  85         .get_timeline_name = panfrost_fence_get_timeline_name,
  86 };
  87
  88 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
  89 {
  90         struct panfrost_fence *fence;
  91         struct panfrost_job_slot *js = pfdev->js;
  92
  93         fence = kzalloc(sizeof(*fence), GFP_KERNEL);
  94         if (!fence)
  95                 return ERR_PTR(-ENOMEM);
  96
  97         fence->dev = pfdev->ddev;
  98         fence->queue = js_num;
  99         fence->seqno = ++js->queue[js_num].emit_seqno;
 100         dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
 101                        js->queue[js_num].fence_context, fence->seqno);
 102
 103         return &fence->base;
 104 }
 105
 106 int panfrost_job_get_slot(struct panfrost_job *job)
 107 {
 108         /* JS0: fragment jobs.
 109          * JS1: vertex/tiler jobs
 110          * JS2: compute jobs
 111          */
 112         if (job->requirements & PANFROST_JD_REQ_FS)
 113                 return 0;
 114
 115 /* Not exposed to userspace yet */
 116 #if 0
 117         if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
 118                 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
 119                     (job->pfdev->features.nr_core_groups == 2))
 120                         return 2;
 121                 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
 122                         return 2;
 123         }
 124 #endif
 125         return 1;
 126 }
 127
 128 static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
 129                                         u32 requirements,
 130                                         int js)
 131 {
 132         u64 affinity;
 133
 134         /*
 135          * Use all cores for now.
 136          * Eventually we may need to support tiler only jobs and h/w with
 137          * multiple (2) coherent core groups
 138          */
 139         affinity = pfdev->features.shader_present;
 140
 141         job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
 142         job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
 143 }
 144
 145 static u32
 146 panfrost_get_job_chain_flag(const struct panfrost_job *job)
 147 {
 148         struct panfrost_fence *f = to_panfrost_fence(job->done_fence);
 149
 150         if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
 151                 return 0;
 152
 153         return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0;
 154 }
 155
 156 static struct panfrost_job *
 157 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
 158 {
 159         struct panfrost_job *job = pfdev->jobs[slot][0];
 160
 161         WARN_ON(!job);
 162
 163         if (job->is_profiled && job->engine_usage) {
 164                 job->engine_usage->elapsed_ns[slot] +=
 165                         ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
 166                 job->engine_usage->cycles[slot] +=
 167                         panfrost_cycle_counter_read(pfdev) - job->start_cycles;
 168         }
 169
 170         if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT || job->is_profiled)
 171                 panfrost_cycle_counter_put(pfdev);
 172
 173         pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
 174         pfdev->jobs[slot][1] = NULL;
 175
 176         return job;
 177 }
 178
 179 static unsigned int
 180 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot,
 181                      struct panfrost_job *job)
 182 {
 183         if (WARN_ON(!job))
 184                 return 0;
 185
 186         if (!pfdev->jobs[slot][0]) {
 187                 pfdev->jobs[slot][0] = job;
 188                 return 0;
 189         }
 190
 191         WARN_ON(pfdev->jobs[slot][1]);
 192         pfdev->jobs[slot][1] = job;
 193         WARN_ON(panfrost_get_job_chain_flag(job) ==
 194                 panfrost_get_job_chain_flag(pfdev->jobs[slot][0]));
 195         return 1;
 196 }
 197
 198 static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 199 {
 200         struct panfrost_device *pfdev = job->pfdev;
 201         unsigned int subslot;
 202         u32 cfg;
 203         u64 jc_head = job->jc;
 204         int ret;
 205
 206         panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
 207
 208         ret = pm_runtime_get_sync(pfdev->dev);
 209         if (ret < 0)
 210                 return;
 211
 212         if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
 213                 return;
 214         }
 215
 216         cfg = panfrost_mmu_as_get(pfdev, job->mmu);
 217
 218         job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
 219         job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
 220
 221         panfrost_job_write_affinity(pfdev, job->requirements, js);
 222
 223         /* start MMU, medium priority, cache clean/flush on end, clean/flush on
 224          * start */
 225         cfg |= JS_CONFIG_THREAD_PRI(8) |
 226                 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
 227                 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
 228                 panfrost_get_job_chain_flag(job);
 229
 230         if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
 231                 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
 232
 233         if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
 234                 cfg |= JS_CONFIG_START_MMU;
 235
 236         job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
 237
 238         if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
 239                 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
 240
 241         /* GO ! */
 242
 243         spin_lock(&pfdev->js->job_lock);
 244         subslot = panfrost_enqueue_job(pfdev, js, job);
 245         /* Don't queue the job if a reset is in progress */
 246         if (!atomic_read(&pfdev->reset.pending)) {
 247                 job->is_profiled = pfdev->profile_mode;
 248
 249                 if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
 250                     job->is_profiled)
 251                         panfrost_cycle_counter_get(pfdev);
 252
 253                 if (job->is_profiled) {
 254                         job->start_time = ktime_get();
 255                         job->start_cycles = panfrost_cycle_counter_read(pfdev);
 256                 }
 257
 258                 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
 259                 dev_dbg(pfdev->dev,
 260                         "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
 261                         job, js, subslot, jc_head, cfg & 0xf);
 262         }
 263         spin_unlock(&pfdev->js->job_lock);
 264 }
 265
 266 static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
 267                                           int bo_count,
 268                                           struct drm_sched_job *job)
 269 {
 270         int i, ret;
 271
 272         for (i = 0; i < bo_count; i++) {
 273                 ret = dma_resv_reserve_fences(bos[i]->resv, 1);
 274                 if (ret)
 275                         return ret;
 276
 277                 /* panfrost always uses write mode in its current uapi */
 278                 ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
 279                                                               true);
 280                 if (ret)
 281                         return ret;
 282         }
 283
 284         return 0;
 285 }
 286
 287 static void panfrost_attach_object_fences(struct drm_gem_object **bos,
 288                                           int bo_count,
 289                                           struct dma_fence *fence)
 290 {
 291         int i;
 292
 293         for (i = 0; i < bo_count; i++)
 294                 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
 295 }
 296
 297 int panfrost_job_push(struct panfrost_job *job)
 298 {
 299         struct panfrost_device *pfdev = job->pfdev;
 300         struct ww_acquire_ctx acquire_ctx;
 301         int ret = 0;
 302
 303         ret = drm_gem_lock_reservations(job->bos, job->bo_count,
 304                                             &acquire_ctx);
 305         if (ret)
 306                 return ret;
 307
 308         mutex_lock(&pfdev->sched_lock);
 309         drm_sched_job_arm(&job->base);
 310
 311         job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
 312
 313         ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
 314                                              &job->base);
 315         if (ret) {
 316                 mutex_unlock(&pfdev->sched_lock);
 317                 goto unlock;
 318         }
 319
 320         kref_get(&job->refcount); /* put by scheduler job completion */
 321
 322         drm_sched_entity_push_job(&job->base);
 323
 324         mutex_unlock(&pfdev->sched_lock);
 325
 326         panfrost_attach_object_fences(job->bos, job->bo_count,
 327                                       job->render_done_fence);
 328
 329 unlock:
 330         drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
 331
 332         return ret;
 333 }
 334
 335 static void panfrost_job_cleanup(struct kref *ref)
 336 {
 337         struct panfrost_job *job = container_of(ref, struct panfrost_job,
 338                                                 refcount);
 339         unsigned int i;
 340
 341         dma_fence_put(job->done_fence);
 342         dma_fence_put(job->render_done_fence);
 343
 344         if (job->mappings) {
 345                 for (i = 0; i < job->bo_count; i++) {
 346                         if (!job->mappings[i])
 347                                 break;
 348
 349                         atomic_dec(&job->mappings[i]->obj->gpu_usecount);
 350                         panfrost_gem_mapping_put(job->mappings[i]);
 351                 }
 352                 kvfree(job->mappings);
 353         }
 354
 355         if (job->bos) {
 356                 for (i = 0; i < job->bo_count; i++)
 357                         drm_gem_object_put(job->bos[i]);
 358
 359                 kvfree(job->bos);
 360         }
 361
 362         kfree(job);
 363 }
 364
 365 void panfrost_job_put(struct panfrost_job *job)
 366 {
 367         kref_put(&job->refcount, panfrost_job_cleanup);
 368 }
 369
 370 static void panfrost_job_free(struct drm_sched_job *sched_job)
 371 {
 372         struct panfrost_job *job = to_panfrost_job(sched_job);
 373
 374         drm_sched_job_cleanup(sched_job);
 375
 376         panfrost_job_put(job);
 377 }
 378
 379 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
 380 {
 381         struct panfrost_job *job = to_panfrost_job(sched_job);
 382         struct panfrost_device *pfdev = job->pfdev;
 383         int slot = panfrost_job_get_slot(job);
 384         struct dma_fence *fence = NULL;
 385
 386         if (unlikely(job->base.s_fence->finished.error))
 387                 return NULL;
 388
 389         /* Nothing to execute: can happen if the job has finished while
 390          * we were resetting the GPU.
 391          */
 392         if (!job->jc)
 393                 return NULL;
 394
 395         fence = panfrost_fence_create(pfdev, slot);
 396         if (IS_ERR(fence))
 397                 return fence;
 398
 399         if (job->done_fence)
 400                 dma_fence_put(job->done_fence);
 401         job->done_fence = dma_fence_get(fence);
 402
 403         panfrost_job_hw_submit(job, slot);
 404
 405         return fence;
 406 }
 407
 408 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
 409 {
 410         int j;
 411         u32 irq_mask = 0;
 412
 413         clear_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
 414
 415         for (j = 0; j < NUM_JOB_SLOTS; j++) {
 416                 irq_mask |= MK_JS_MASK(j);
 417         }
 418
 419         job_write(pfdev, JOB_INT_CLEAR, irq_mask);
 420         job_write(pfdev, JOB_INT_MASK, irq_mask);
 421 }
 422
 423 void panfrost_job_suspend_irq(struct panfrost_device *pfdev)
 424 {
 425         set_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended);
 426
 427         job_write(pfdev, JOB_INT_MASK, 0);
 428         synchronize_irq(pfdev->js->irq);
 429 }
 430
 431 static void panfrost_job_handle_err(struct panfrost_device *pfdev,
 432                                     struct panfrost_job *job,
 433                                     unsigned int js)
 434 {
 435         u32 js_status = job_read(pfdev, JS_STATUS(js));
 436         const char *exception_name = panfrost_exception_name(js_status);
 437         bool signal_fence = true;
 438
 439         if (!panfrost_exception_is_fault(js_status)) {
 440                 dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
 441                         js, exception_name,
 442                         job_read(pfdev, JS_HEAD_LO(js)),
 443                         job_read(pfdev, JS_TAIL_LO(js)));
 444         } else {
 445                 dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
 446                         js, exception_name,
 447                         job_read(pfdev, JS_HEAD_LO(js)),
 448                         job_read(pfdev, JS_TAIL_LO(js)));
 449         }
 450
 451         if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
 452                 /* Update the job head so we can resume */
 453                 job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
 454                           ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
 455
 456                 /* The job will be resumed, don't signal the fence */
 457                 signal_fence = false;
 458         } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
 459                 /* Job has been hard-stopped, flag it as canceled */
 460                 dma_fence_set_error(job->done_fence, -ECANCELED);
 461                 job->jc = 0;
 462         } else if (panfrost_exception_is_fault(js_status)) {
 463                 /* We might want to provide finer-grained error code based on
 464                  * the exception type, but unconditionally setting to EINVAL
 465                  * is good enough for now.
 466                  */
 467                 dma_fence_set_error(job->done_fence, -EINVAL);
 468                 job->jc = 0;
 469         }
 470
 471         panfrost_mmu_as_put(pfdev, job->mmu);
 472         panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
 473
 474         if (signal_fence)
 475                 dma_fence_signal_locked(job->done_fence);
 476
 477         pm_runtime_put_autosuspend(pfdev->dev);
 478
 479         if (panfrost_exception_needs_reset(pfdev, js_status)) {
 480                 atomic_set(&pfdev->reset.pending, 1);
 481                 drm_sched_fault(&pfdev->js->queue[js].sched);
 482         }
 483 }
 484
 485 static void panfrost_job_handle_done(struct panfrost_device *pfdev,
 486                                      struct panfrost_job *job)
 487 {
 488         /* Set ->jc to 0 to avoid re-submitting an already finished job (can
 489          * happen when we receive the DONE interrupt while doing a GPU reset).
 490          */
 491         job->jc = 0;
 492         panfrost_mmu_as_put(pfdev, job->mmu);
 493         panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
 494
 495         dma_fence_signal_locked(job->done_fence);
 496         pm_runtime_put_autosuspend(pfdev->dev);
 497 }
 498
 499 static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
 500 {
 501         struct panfrost_job *done[NUM_JOB_SLOTS][2] = {};
 502         struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
 503         u32 js_state = 0, js_events = 0;
 504         unsigned int i, j;
 505
 506         /* First we collect all failed/done jobs. */
 507         while (status) {
 508                 u32 js_state_mask = 0;
 509
 510                 for (j = 0; j < NUM_JOB_SLOTS; j++) {
 511                         if (status & MK_JS_MASK(j))
 512                                 js_state_mask |= MK_JS_MASK(j);
 513
 514                         if (status & JOB_INT_MASK_DONE(j)) {
 515                                 if (done[j][0])
 516                                         done[j][1] = panfrost_dequeue_job(pfdev, j);
 517                                 else
 518                                         done[j][0] = panfrost_dequeue_job(pfdev, j);
 519                         }
 520
 521                         if (status & JOB_INT_MASK_ERR(j)) {
 522                                 /* Cancel the next submission. Will be submitted
 523                                  * after we're done handling this failure if
 524                                  * there's no reset pending.
 525                                  */
 526                                 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
 527                                 failed[j] = panfrost_dequeue_job(pfdev, j);
 528                         }
 529                 }
 530
 531                 /* JS_STATE is sampled when JOB_INT_CLEAR is written.
 532                  * For each BIT(slot) or BIT(slot + 16) bit written to
 533                  * JOB_INT_CLEAR, the corresponding bits in JS_STATE
 534                  * (BIT(slot) and BIT(slot + 16)) are updated, but this
 535                  * is racy. If we only have one job done at the time we
 536                  * read JOB_INT_RAWSTAT but the second job fails before we
 537                  * clear the status, we end up with a status containing
 538                  * only the DONE bit and consider both jobs as DONE since
 539                  * JS_STATE reports both NEXT and CURRENT as inactive.
 540                  * To prevent that, let's repeat this clear+read steps
 541                  * until status is 0.
 542                  */
 543                 job_write(pfdev, JOB_INT_CLEAR, status);
 544                 js_state &= ~js_state_mask;
 545                 js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
 546                 js_events |= status;
 547                 status = job_read(pfdev, JOB_INT_RAWSTAT);
 548         }
 549
 550         /* Then we handle the dequeued jobs. */
 551         for (j = 0; j < NUM_JOB_SLOTS; j++) {
 552                 if (!(js_events & MK_JS_MASK(j)))
 553                         continue;
 554
 555                 if (failed[j]) {
 556                         panfrost_job_handle_err(pfdev, failed[j], j);
 557                 } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) {
 558                         /* When the current job doesn't fail, the JM dequeues
 559                          * the next job without waiting for an ACK, this means
 560                          * we can have 2 jobs dequeued and only catch the
 561                          * interrupt when the second one is done. If both slots
 562                          * are inactive, but one job remains in pfdev->jobs[j],
 563                          * consider it done. Of course that doesn't apply if a
 564                          * failure happened since we cancelled execution of the
 565                          * job in _NEXT (see above).
 566                          */
 567                         if (WARN_ON(!done[j][0]))
 568                                 done[j][0] = panfrost_dequeue_job(pfdev, j);
 569                         else
 570                                 done[j][1] = panfrost_dequeue_job(pfdev, j);
 571                 }
 572
 573                 for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
 574                         panfrost_job_handle_done(pfdev, done[j][i]);
 575         }
 576
 577         /* And finally we requeue jobs that were waiting in the second slot
 578          * and have been stopped if we detected a failure on the first slot.
 579          */
 580         for (j = 0; j < NUM_JOB_SLOTS; j++) {
 581                 if (!(js_events & MK_JS_MASK(j)))
 582                         continue;
 583
 584                 if (!failed[j] || !pfdev->jobs[j][0])
 585                         continue;
 586
 587                 if (pfdev->jobs[j][0]->jc == 0) {
 588                         /* The job was cancelled, signal the fence now */
 589                         struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
 590
 591                         dma_fence_set_error(canceled->done_fence, -ECANCELED);
 592                         panfrost_job_handle_done(pfdev, canceled);
 593                 } else if (!atomic_read(&pfdev->reset.pending)) {
 594                         /* Requeue the job we removed if no reset is pending */
 595                         job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
 596                 }
 597         }
 598 }
 599
 600 static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
 601 {
 602         u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
 603
 604         while (status) {
 605                 pm_runtime_mark_last_busy(pfdev->dev);
 606
 607                 spin_lock(&pfdev->js->job_lock);
 608                 panfrost_job_handle_irq(pfdev, status);
 609                 spin_unlock(&pfdev->js->job_lock);
 610                 status = job_read(pfdev, JOB_INT_RAWSTAT);
 611         }
 612 }
 613
 614 static u32 panfrost_active_slots(struct panfrost_device *pfdev,
 615                                  u32 *js_state_mask, u32 js_state)
 616 {
 617         u32 rawstat;
 618
 619         if (!(js_state & *js_state_mask))
 620                 return 0;
 621
 622         rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
 623         if (rawstat) {
 624                 unsigned int i;
 625
 626                 for (i = 0; i < NUM_JOB_SLOTS; i++) {
 627                         if (rawstat & MK_JS_MASK(i))
 628                                 *js_state_mask &= ~MK_JS_MASK(i);
 629                 }
 630         }
 631
 632         return js_state & *js_state_mask;
 633 }
 634
 635 static void
 636 panfrost_reset(struct panfrost_device *pfdev,
 637                struct drm_sched_job *bad)
 638 {
 639         u32 js_state, js_state_mask = 0xffffffff;
 640         unsigned int i, j;
 641         bool cookie;
 642         int ret;
 643
 644         if (!atomic_read(&pfdev->reset.pending))
 645                 return;
 646
 647         /* Stop the schedulers.
 648          *
 649          * FIXME: We temporarily get out of the dma_fence_signalling section
 650          * because the cleanup path generate lockdep splats when taking locks
 651          * to release job resources. We should rework the code to follow this
 652          * pattern:
 653          *
 654          *      try_lock
 655          *      if (locked)
 656          *              release
 657          *      else
 658          *              schedule_work_to_release_later
 659          */
 660         for (i = 0; i < NUM_JOB_SLOTS; i++)
 661                 drm_sched_stop(&pfdev->js->queue[i].sched, bad);
 662
 663         cookie = dma_fence_begin_signalling();
 664
 665         if (bad)
 666                 drm_sched_increase_karma(bad);
 667
 668         /* Mask job interrupts and synchronize to make sure we won't be
 669          * interrupted during our reset.
 670          */
 671         job_write(pfdev, JOB_INT_MASK, 0);
 672         synchronize_irq(pfdev->js->irq);
 673
 674         for (i = 0; i < NUM_JOB_SLOTS; i++) {
 675                 /* Cancel the next job and soft-stop the running job. */
 676                 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
 677                 job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
 678         }
 679
 680         /* Wait at most 10ms for soft-stops to complete */
 681         ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
 682                                  !panfrost_active_slots(pfdev, &js_state_mask, js_state),
 683                                  10, 10000);
 684
 685         if (ret)
 686                 dev_err(pfdev->dev, "Soft-stop failed\n");
 687
 688         /* Handle the remaining interrupts before we reset. */
 689         panfrost_job_handle_irqs(pfdev);
 690
 691         /* Remaining interrupts have been handled, but we might still have
 692          * stuck jobs. Let's make sure the PM counters stay balanced by
 693          * manually calling pm_runtime_put_noidle() and
 694          * panfrost_devfreq_record_idle() for each stuck job.
 695          * Let's also make sure the cycle counting register's refcnt is
 696          * kept balanced to prevent it from running forever
 697          */
 698         spin_lock(&pfdev->js->job_lock);
 699         for (i = 0; i < NUM_JOB_SLOTS; i++) {
 700                 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
 701                         if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
 702                             pfdev->jobs[i][j]->is_profiled)
 703                                 panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
 704                         pm_runtime_put_noidle(pfdev->dev);
 705                         panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
 706                 }
 707         }
 708         memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
 709         spin_unlock(&pfdev->js->job_lock);
 710
 711         /* Proceed with reset now. */
 712         panfrost_device_reset(pfdev);
 713
 714         /* panfrost_device_reset() unmasks job interrupts, but we want to
 715          * keep them masked a bit longer.
 716          */
 717         job_write(pfdev, JOB_INT_MASK, 0);
 718
 719         /* GPU has been reset, we can clear the reset pending bit. */
 720         atomic_set(&pfdev->reset.pending, 0);
 721
 722         /* Now resubmit jobs that were previously queued but didn't have a
 723          * chance to finish.
 724          * FIXME: We temporarily get out of the DMA fence signalling section
 725          * while resubmitting jobs because the job submission logic will
 726          * allocate memory with the GFP_KERNEL flag which can trigger memory
 727          * reclaim and exposes a lock ordering issue.
 728          */
 729         dma_fence_end_signalling(cookie);
 730         for (i = 0; i < NUM_JOB_SLOTS; i++)
 731                 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
 732         cookie = dma_fence_begin_signalling();
 733
 734         /* Restart the schedulers */
 735         for (i = 0; i < NUM_JOB_SLOTS; i++)
 736                 drm_sched_start(&pfdev->js->queue[i].sched, 0);
 737
 738         /* Re-enable job interrupts now that everything has been restarted. */
 739         job_write(pfdev, JOB_INT_MASK,
 740                   GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
 741                   GENMASK(NUM_JOB_SLOTS - 1, 0));
 742
 743         dma_fence_end_signalling(cookie);
 744 }
 745
 746 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
 747                                                      *sched_job)
 748 {
 749         struct panfrost_job *job = to_panfrost_job(sched_job);
 750         struct panfrost_device *pfdev = job->pfdev;
 751         int js = panfrost_job_get_slot(job);
 752
 753         /*
 754          * If the GPU managed to complete this jobs fence, the timeout is
 755          * spurious. Bail out.
 756          */
 757         if (dma_fence_is_signaled(job->done_fence))
 758                 return DRM_GPU_SCHED_STAT_NOMINAL;
 759
 760         /*
 761          * Panfrost IRQ handler may take a long time to process an interrupt
 762          * if there is another IRQ handler hogging the processing.
 763          * For example, the HDMI encoder driver might be stuck in the IRQ
 764          * handler for a significant time in a case of bad cable connection.
 765          * In order to catch such cases and not report spurious Panfrost
 766          * job timeouts, synchronize the IRQ handler and re-check the fence
 767          * status.
 768          */
 769         synchronize_irq(pfdev->js->irq);
 770
 771         if (dma_fence_is_signaled(job->done_fence)) {
 772                 dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
 773                 return DRM_GPU_SCHED_STAT_NOMINAL;
 774         }
 775
 776         dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
 777                 js,
 778                 job_read(pfdev, JS_CONFIG(js)),
 779                 job_read(pfdev, JS_STATUS(js)),
 780                 job_read(pfdev, JS_HEAD_LO(js)),
 781                 job_read(pfdev, JS_TAIL_LO(js)),
 782                 sched_job);
 783
 784         panfrost_core_dump(job);
 785
 786         atomic_set(&pfdev->reset.pending, 1);
 787         panfrost_reset(pfdev, sched_job);
 788
 789         return DRM_GPU_SCHED_STAT_NOMINAL;
 790 }
 791
 792 static void panfrost_reset_work(struct work_struct *work)
 793 {
 794         struct panfrost_device *pfdev;
 795
 796         pfdev = container_of(work, struct panfrost_device, reset.work);
 797         panfrost_reset(pfdev, NULL);
 798 }
 799
 800 static const struct drm_sched_backend_ops panfrost_sched_ops = {
 801         .run_job = panfrost_job_run,
 802         .timedout_job = panfrost_job_timedout,
 803         .free_job = panfrost_job_free
 804 };
 805
 806 static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
 807 {
 808         struct panfrost_device *pfdev = data;
 809
 810         panfrost_job_handle_irqs(pfdev);
 811
 812         /* Enable interrupts only if we're not about to get suspended */
 813         if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
 814                 job_write(pfdev, JOB_INT_MASK,
 815                           GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
 816                           GENMASK(NUM_JOB_SLOTS - 1, 0));
 817
 818         return IRQ_HANDLED;
 819 }
 820
 821 static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
 822 {
 823         struct panfrost_device *pfdev = data;
 824         u32 status;
 825
 826         if (test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
 827                 return IRQ_NONE;
 828
 829         status = job_read(pfdev, JOB_INT_STAT);
 830         if (!status)
 831                 return IRQ_NONE;
 832
 833         job_write(pfdev, JOB_INT_MASK, 0);
 834         return IRQ_WAKE_THREAD;
 835 }
 836
 837 int panfrost_job_init(struct panfrost_device *pfdev)
 838 {
 839         struct panfrost_job_slot *js;
 840         unsigned int nentries = 2;
 841         int ret, j;
 842
 843         /* All GPUs have two entries per queue, but without jobchain
 844          * disambiguation stopping the right job in the close path is tricky,
 845          * so let's just advertise one entry in that case.
 846          */
 847         if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
 848                 nentries = 1;
 849
 850         pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
 851         if (!js)
 852                 return -ENOMEM;
 853
 854         INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
 855         spin_lock_init(&js->job_lock);
 856
 857         js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
 858         if (js->irq < 0)
 859                 return js->irq;
 860
 861         ret = devm_request_threaded_irq(pfdev->dev, js->irq,
 862                                         panfrost_job_irq_handler,
 863                                         panfrost_job_irq_handler_thread,
 864                                         IRQF_SHARED, KBUILD_MODNAME "-job",
 865                                         pfdev);
 866         if (ret) {
 867                 dev_err(pfdev->dev, "failed to request job irq");
 868                 return ret;
 869         }
 870
 871         pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
 872         if (!pfdev->reset.wq)
 873                 return -ENOMEM;
 874
 875         for (j = 0; j < NUM_JOB_SLOTS; j++) {
 876                 js->queue[j].fence_context = dma_fence_context_alloc(1);
 877
 878                 ret = drm_sched_init(&js->queue[j].sched,
 879                                      &panfrost_sched_ops, NULL,
 880                                      DRM_SCHED_PRIORITY_COUNT,
 881                                      nentries, 0,
 882                                      msecs_to_jiffies(JOB_TIMEOUT_MS),
 883                                      pfdev->reset.wq,
 884                                      NULL, "pan_js", pfdev->dev);
 885                 if (ret) {
 886                         dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
 887                         goto err_sched;
 888                 }
 889         }
 890
 891         panfrost_job_enable_interrupts(pfdev);
 892
 893         return 0;
 894
 895 err_sched:
 896         for (j--; j >= 0; j--)
 897                 drm_sched_fini(&js->queue[j].sched);
 898
 899         destroy_workqueue(pfdev->reset.wq);
 900         return ret;
 901 }
 902
 903 void panfrost_job_fini(struct panfrost_device *pfdev)
 904 {
 905         struct panfrost_job_slot *js = pfdev->js;
 906         int j;
 907
 908         job_write(pfdev, JOB_INT_MASK, 0);
 909
 910         for (j = 0; j < NUM_JOB_SLOTS; j++) {
 911                 drm_sched_fini(&js->queue[j].sched);
 912         }
 913
 914         cancel_work_sync(&pfdev->reset.work);
 915         destroy_workqueue(pfdev->reset.wq);
 916 }
 917
 918 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
 919 {
 920         struct panfrost_device *pfdev = panfrost_priv->pfdev;
 921         struct panfrost_job_slot *js = pfdev->js;
 922         struct drm_gpu_scheduler *sched;
 923         int ret, i;
 924
 925         for (i = 0; i < NUM_JOB_SLOTS; i++) {
 926                 sched = &js->queue[i].sched;
 927                 ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
 928                                             DRM_SCHED_PRIORITY_NORMAL, &sched,
 929                                             1, NULL);
 930                 if (WARN_ON(ret))
 931                         return ret;
 932         }
 933         return 0;
 934 }
 935
 936 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
 937 {
 938         struct panfrost_device *pfdev = panfrost_priv->pfdev;
 939         int i;
 940
 941         for (i = 0; i < NUM_JOB_SLOTS; i++)
 942                 drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
 943
 944         /* Kill in-flight jobs */
 945         spin_lock(&pfdev->js->job_lock);
 946         for (i = 0; i < NUM_JOB_SLOTS; i++) {
 947                 struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
 948                 int j;
 949
 950                 for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
 951                         struct panfrost_job *job = pfdev->jobs[i][j];
 952                         u32 cmd;
 953
 954                         if (!job || job->base.entity != entity)
 955                                 continue;
 956
 957                         if (j == 1) {
 958                                 /* Try to cancel the job before it starts */
 959                                 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
 960                                 /* Reset the job head so it doesn't get restarted if
 961                                  * the job in the first slot failed.
 962                                  */
 963                                 job->jc = 0;
 964                         }
 965
 966                         if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
 967                                 cmd = panfrost_get_job_chain_flag(job) ?
 968                                       JS_COMMAND_HARD_STOP_1 :
 969                                       JS_COMMAND_HARD_STOP_0;
 970                         } else {
 971                                 cmd = JS_COMMAND_HARD_STOP;
 972                         }
 973
 974                         job_write(pfdev, JS_COMMAND(i), cmd);
 975
 976                         /* Jobs can outlive their file context */
 977                         job->engine_usage = NULL;
 978                 }
 979         }
 980         spin_unlock(&pfdev->js->job_lock);
 981 }
 982
 983 int panfrost_job_is_idle(struct panfrost_device *pfdev)
 984 {
 985         struct panfrost_job_slot *js = pfdev->js;
 986         int i;
 987
 988         for (i = 0; i < NUM_JOB_SLOTS; i++) {
 989                 /* If there are any jobs in the HW queue, we're not idle */
 990                 if (atomic_read(&js->queue[i].sched.credit_count))
 991                         return false;
 992         }
 993
 994         return true;
 995 }