drivers/gpu/drm/scheduler/sched_main.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 /**
  25  * DOC: Overview
  26  *
  27  * The GPU scheduler provides entities which allow userspace to push jobs
  28  * into software queues which are then scheduled on a hardware run queue.
  29  * The software queues have a priority among them. The scheduler selects the entities
  30  * from the run queue using a FIFO. The scheduler provides dependency handling
  31  * features among jobs. The driver is supposed to provide callback functions for
  32  * backend operations to the scheduler like submitting a job to hardware run queue,
  33  * returning the dependencies of a job etc.
  34  *
  35  * The organisation of the scheduler is the following:
  36  *
  37  * 1. Each hw run queue has one scheduler
  38  * 2. Each scheduler has multiple run queues with different priorities
  39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40  * 3. Each scheduler run queue has a queue of entities to schedule
  41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42  *    the hardware.
  43  *
  44  * The jobs in a entity are always scheduled in the order that they were pushed.
  45  */
  46
  47 #include <linux/kthread.h>
  48 #include <linux/wait.h>
  49 #include <linux/sched.h>
  50 #include <linux/completion.h>
  51 #include <uapi/linux/sched/types.h>
  52
  53 #include <drm/drm_print.h>
  54 #include <drm/gpu_scheduler.h>
  55 #include <drm/spsc_queue.h>
  56
  57 #define CREATE_TRACE_POINTS
  58 #include "gpu_scheduler_trace.h"
  59
  60 #define to_drm_sched_job(sched_job)             \
  61                 container_of((sched_job), struct drm_sched_job, queue_node)
  62
  63 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65 /**
  66  * drm_sched_rq_init - initialize a given run queue struct
  67  *
  68  * @rq: scheduler run queue
  69  *
  70  * Initializes a scheduler runqueue.
  71  */
  72 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                               struct drm_sched_rq *rq)
  74 {
  75         spin_lock_init(&rq->lock);
  76         INIT_LIST_HEAD(&rq->entities);
  77         rq->current_entity = NULL;
  78         rq->sched = sched;
  79 }
  80
  81 /**
  82  * drm_sched_rq_add_entity - add an entity
  83  *
  84  * @rq: scheduler run queue
  85  * @entity: scheduler entity
  86  *
  87  * Adds a scheduler entity to the run queue.
  88  */
  89 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                              struct drm_sched_entity *entity)
  91 {
  92         if (!list_empty(&entity->list))
  93                 return;
  94         spin_lock(&rq->lock);
  95         atomic_inc(&rq->sched->score);
  96         list_add_tail(&entity->list, &rq->entities);
  97         spin_unlock(&rq->lock);
  98 }
  99
 100 /**
 101  * drm_sched_rq_remove_entity - remove an entity
 102  *
 103  * @rq: scheduler run queue
 104  * @entity: scheduler entity
 105  *
 106  * Removes a scheduler entity from the run queue.
 107  */
 108 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 109                                 struct drm_sched_entity *entity)
 110 {
 111         if (list_empty(&entity->list))
 112                 return;
 113         spin_lock(&rq->lock);
 114         atomic_dec(&rq->sched->score);
 115         list_del_init(&entity->list);
 116         if (rq->current_entity == entity)
 117                 rq->current_entity = NULL;
 118         spin_unlock(&rq->lock);
 119 }
 120
 121 /**
 122  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 123  *
 124  * @rq: scheduler run queue to check.
 125  *
 126  * Try to find a ready entity, returns NULL if none found.
 127  */
 128 static struct drm_sched_entity *
 129 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 130 {
 131         struct drm_sched_entity *entity;
 132
 133         spin_lock(&rq->lock);
 134
 135         entity = rq->current_entity;
 136         if (entity) {
 137                 list_for_each_entry_continue(entity, &rq->entities, list) {
 138                         if (drm_sched_entity_is_ready(entity)) {
 139                                 rq->current_entity = entity;
 140                                 reinit_completion(&entity->entity_idle);
 141                                 spin_unlock(&rq->lock);
 142                                 return entity;
 143                         }
 144                 }
 145         }
 146
 147         list_for_each_entry(entity, &rq->entities, list) {
 148
 149                 if (drm_sched_entity_is_ready(entity)) {
 150                         rq->current_entity = entity;
 151                         reinit_completion(&entity->entity_idle);
 152                         spin_unlock(&rq->lock);
 153                         return entity;
 154                 }
 155
 156                 if (entity == rq->current_entity)
 157                         break;
 158         }
 159
 160         spin_unlock(&rq->lock);
 161
 162         return NULL;
 163 }
 164
 165 /**
 166  * drm_sched_dependency_optimized
 167  *
 168  * @fence: the dependency fence
 169  * @entity: the entity which depends on the above fence
 170  *
 171  * Returns true if the dependency can be optimized and false otherwise
 172  */
 173 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 174                                     struct drm_sched_entity *entity)
 175 {
 176         struct drm_gpu_scheduler *sched = entity->rq->sched;
 177         struct drm_sched_fence *s_fence;
 178
 179         if (!fence || dma_fence_is_signaled(fence))
 180                 return false;
 181         if (fence->context == entity->fence_context)
 182                 return true;
 183         s_fence = to_drm_sched_fence(fence);
 184         if (s_fence && s_fence->sched == sched)
 185                 return true;
 186
 187         return false;
 188 }
 189 EXPORT_SYMBOL(drm_sched_dependency_optimized);
 190
 191 /**
 192  * drm_sched_start_timeout - start timeout for reset worker
 193  *
 194  * @sched: scheduler instance to start the worker for
 195  *
 196  * Start the timeout for the given scheduler.
 197  */
 198 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 199 {
 200         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 201             !list_empty(&sched->ring_mirror_list))
 202                 schedule_delayed_work(&sched->work_tdr, sched->timeout);
 203 }
 204
 205 /**
 206  * drm_sched_fault - immediately start timeout handler
 207  *
 208  * @sched: scheduler where the timeout handling should be started.
 209  *
 210  * Start timeout handling immediately when the driver detects a hardware fault.
 211  */
 212 void drm_sched_fault(struct drm_gpu_scheduler *sched)
 213 {
 214         mod_delayed_work(system_wq, &sched->work_tdr, 0);
 215 }
 216 EXPORT_SYMBOL(drm_sched_fault);
 217
 218 /**
 219  * drm_sched_suspend_timeout - Suspend scheduler job timeout
 220  *
 221  * @sched: scheduler instance for which to suspend the timeout
 222  *
 223  * Suspend the delayed work timeout for the scheduler. This is done by
 224  * modifying the delayed work timeout to an arbitrary large value,
 225  * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
 226  * called from an IRQ context.
 227  *
 228  * Returns the timeout remaining
 229  *
 230  */
 231 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
 232 {
 233         unsigned long sched_timeout, now = jiffies;
 234
 235         sched_timeout = sched->work_tdr.timer.expires;
 236
 237         /*
 238          * Modify the timeout to an arbitrarily large value. This also prevents
 239          * the timeout to be restarted when new submissions arrive
 240          */
 241         if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
 242                         && time_after(sched_timeout, now))
 243                 return sched_timeout - now;
 244         else
 245                 return sched->timeout;
 246 }
 247 EXPORT_SYMBOL(drm_sched_suspend_timeout);
 248
 249 /**
 250  * drm_sched_resume_timeout - Resume scheduler job timeout
 251  *
 252  * @sched: scheduler instance for which to resume the timeout
 253  * @remaining: remaining timeout
 254  *
 255  * Resume the delayed work timeout for the scheduler. Note that
 256  * this function can be called from an IRQ context.
 257  */
 258 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 259                 unsigned long remaining)
 260 {
 261         unsigned long flags;
 262
 263         spin_lock_irqsave(&sched->job_list_lock, flags);
 264
 265         if (list_empty(&sched->ring_mirror_list))
 266                 cancel_delayed_work(&sched->work_tdr);
 267         else
 268                 mod_delayed_work(system_wq, &sched->work_tdr, remaining);
 269
 270         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 271 }
 272 EXPORT_SYMBOL(drm_sched_resume_timeout);
 273
 274 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 275 {
 276         struct drm_gpu_scheduler *sched = s_job->sched;
 277         unsigned long flags;
 278
 279         spin_lock_irqsave(&sched->job_list_lock, flags);
 280         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 281         drm_sched_start_timeout(sched);
 282         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 283 }
 284
 285 static void drm_sched_job_timedout(struct work_struct *work)
 286 {
 287         struct drm_gpu_scheduler *sched;
 288         struct drm_sched_job *job;
 289         unsigned long flags;
 290
 291         sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 292
 293         /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
 294         spin_lock_irqsave(&sched->job_list_lock, flags);
 295         job = list_first_entry_or_null(&sched->ring_mirror_list,
 296                                        struct drm_sched_job, node);
 297
 298         if (job) {
 299                 /*
 300                  * Remove the bad job so it cannot be freed by concurrent
 301                  * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
 302                  * is parked at which point it's safe.
 303                  */
 304                 list_del_init(&job->node);
 305                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 306
 307                 job->sched->ops->timedout_job(job);
 308
 309                 /*
 310                  * Guilty job did complete and hence needs to be manually removed
 311                  * See drm_sched_stop doc.
 312                  */
 313                 if (sched->free_guilty) {
 314                         job->sched->ops->free_job(job);
 315                         sched->free_guilty = false;
 316                 }
 317         } else {
 318                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 319         }
 320
 321         spin_lock_irqsave(&sched->job_list_lock, flags);
 322         drm_sched_start_timeout(sched);
 323         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 324 }
 325
 326  /**
 327   * drm_sched_increase_karma - Update sched_entity guilty flag
 328   *
 329   * @bad: The job guilty of time out
 330   *
 331   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
 332   * limit of the scheduler then the respective sched entity is marked guilty and
 333   * jobs from it will not be scheduled further
 334   */
 335 void drm_sched_increase_karma(struct drm_sched_job *bad)
 336 {
 337         int i;
 338         struct drm_sched_entity *tmp;
 339         struct drm_sched_entity *entity;
 340         struct drm_gpu_scheduler *sched = bad->sched;
 341
 342         /* don't increase @bad's karma if it's from KERNEL RQ,
 343          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 344          * corrupt but keep in mind that kernel jobs always considered good.
 345          */
 346         if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 347                 atomic_inc(&bad->karma);
 348                 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 349                      i++) {
 350                         struct drm_sched_rq *rq = &sched->sched_rq[i];
 351
 352                         spin_lock(&rq->lock);
 353                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 354                                 if (bad->s_fence->scheduled.context ==
 355                                     entity->fence_context) {
 356                                         if (atomic_read(&bad->karma) >
 357                                             bad->sched->hang_limit)
 358                                                 if (entity->guilty)
 359                                                         atomic_set(entity->guilty, 1);
 360                                         break;
 361                                 }
 362                         }
 363                         spin_unlock(&rq->lock);
 364                         if (&entity->list != &rq->entities)
 365                                 break;
 366                 }
 367         }
 368 }
 369 EXPORT_SYMBOL(drm_sched_increase_karma);
 370
 371 /**
 372  * drm_sched_stop - stop the scheduler
 373  *
 374  * @sched: scheduler instance
 375  * @bad: job which caused the time out
 376  *
 377  * Stop the scheduler and also removes and frees all completed jobs.
 378  * Note: bad job will not be freed as it might be used later and so it's
 379  * callers responsibility to release it manually if it's not part of the
 380  * mirror list any more.
 381  *
 382  */
 383 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 384 {
 385         struct drm_sched_job *s_job, *tmp;
 386         unsigned long flags;
 387
 388         kthread_park(sched->thread);
 389
 390         /*
 391          * Reinsert back the bad job here - now it's safe as
 392          * drm_sched_get_cleanup_job cannot race against us and release the
 393          * bad job at this point - we parked (waited for) any in progress
 394          * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
 395          * now until the scheduler thread is unparked.
 396          */
 397         if (bad && bad->sched == sched)
 398                 /*
 399                  * Add at the head of the queue to reflect it was the earliest
 400                  * job extracted.
 401                  */
 402                 list_add(&bad->node, &sched->ring_mirror_list);
 403
 404         /*
 405          * Iterate the job list from later to  earlier one and either deactive
 406          * their HW callbacks or remove them from mirror list if they already
 407          * signaled.
 408          * This iteration is thread safe as sched thread is stopped.
 409          */
 410         list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 411                 if (s_job->s_fence->parent &&
 412                     dma_fence_remove_callback(s_job->s_fence->parent,
 413                                               &s_job->cb)) {
 414                         atomic_dec(&sched->hw_rq_count);
 415                 } else {
 416                         /*
 417                          * remove job from ring_mirror_list.
 418                          * Locking here is for concurrent resume timeout
 419                          */
 420                         spin_lock_irqsave(&sched->job_list_lock, flags);
 421                         list_del_init(&s_job->node);
 422                         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 423
 424                         /*
 425                          * Wait for job's HW fence callback to finish using s_job
 426                          * before releasing it.
 427                          *
 428                          * Job is still alive so fence refcount at least 1
 429                          */
 430                         dma_fence_wait(&s_job->s_fence->finished, false);
 431
 432                         /*
 433                          * We must keep bad job alive for later use during
 434                          * recovery by some of the drivers but leave a hint
 435                          * that the guilty job must be released.
 436                          */
 437                         if (bad != s_job)
 438                                 sched->ops->free_job(s_job);
 439                         else
 440                                 sched->free_guilty = true;
 441                 }
 442         }
 443
 444         /*
 445          * Stop pending timer in flight as we rearm it in  drm_sched_start. This
 446          * avoids the pending timeout work in progress to fire right away after
 447          * this TDR finished and before the newly restarted jobs had a
 448          * chance to complete.
 449          */
 450         cancel_delayed_work(&sched->work_tdr);
 451 }
 452
 453 EXPORT_SYMBOL(drm_sched_stop);
 454
 455 /**
 456  * drm_sched_job_recovery - recover jobs after a reset
 457  *
 458  * @sched: scheduler instance
 459  * @full_recovery: proceed with complete sched restart
 460  *
 461  */
 462 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 463 {
 464         struct drm_sched_job *s_job, *tmp;
 465         unsigned long flags;
 466         int r;
 467
 468         /*
 469          * Locking the list is not required here as the sched thread is parked
 470          * so no new jobs are being inserted or removed. Also concurrent
 471          * GPU recovers can't run in parallel.
 472          */
 473         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 474                 struct dma_fence *fence = s_job->s_fence->parent;
 475
 476                 atomic_inc(&sched->hw_rq_count);
 477
 478                 if (!full_recovery)
 479                         continue;
 480
 481                 if (fence) {
 482                         r = dma_fence_add_callback(fence, &s_job->cb,
 483                                                    drm_sched_process_job);
 484                         if (r == -ENOENT)
 485                                 drm_sched_process_job(fence, &s_job->cb);
 486                         else if (r)
 487                                 DRM_ERROR("fence add callback failed (%d)\n",
 488                                           r);
 489                 } else
 490                         drm_sched_process_job(NULL, &s_job->cb);
 491         }
 492
 493         if (full_recovery) {
 494                 spin_lock_irqsave(&sched->job_list_lock, flags);
 495                 drm_sched_start_timeout(sched);
 496                 spin_unlock_irqrestore(&sched->job_list_lock, flags);
 497         }
 498
 499         kthread_unpark(sched->thread);
 500 }
 501 EXPORT_SYMBOL(drm_sched_start);
 502
 503 /**
 504  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
 505  *
 506  * @sched: scheduler instance
 507  *
 508  */
 509 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 510 {
 511         struct drm_sched_job *s_job, *tmp;
 512         uint64_t guilty_context;
 513         bool found_guilty = false;
 514         struct dma_fence *fence;
 515
 516         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 517                 struct drm_sched_fence *s_fence = s_job->s_fence;
 518
 519                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 520                         found_guilty = true;
 521                         guilty_context = s_job->s_fence->scheduled.context;
 522                 }
 523
 524                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 525                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
 526
 527                 dma_fence_put(s_job->s_fence->parent);
 528                 fence = sched->ops->run_job(s_job);
 529
 530                 if (IS_ERR_OR_NULL(fence)) {
 531                         if (IS_ERR(fence))
 532                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 533
 534                         s_job->s_fence->parent = NULL;
 535                 } else {
 536                         s_job->s_fence->parent = fence;
 537                 }
 538
 539
 540         }
 541 }
 542 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 543
 544 /**
 545  * drm_sched_job_init - init a scheduler job
 546  *
 547  * @job: scheduler job to init
 548  * @entity: scheduler entity to use
 549  * @owner: job owner for debugging
 550  *
 551  * Refer to drm_sched_entity_push_job() documentation
 552  * for locking considerations.
 553  *
 554  * Returns 0 for success, negative error code otherwise.
 555  */
 556 int drm_sched_job_init(struct drm_sched_job *job,
 557                        struct drm_sched_entity *entity,
 558                        void *owner)
 559 {
 560         struct drm_gpu_scheduler *sched;
 561
 562         drm_sched_entity_select_rq(entity);
 563         if (!entity->rq)
 564                 return -ENOENT;
 565
 566         sched = entity->rq->sched;
 567
 568         job->sched = sched;
 569         job->entity = entity;
 570         job->s_priority = entity->rq - sched->sched_rq;
 571         job->s_fence = drm_sched_fence_create(entity, owner);
 572         if (!job->s_fence)
 573                 return -ENOMEM;
 574         job->id = atomic64_inc_return(&sched->job_id_count);
 575
 576         INIT_LIST_HEAD(&job->node);
 577
 578         return 0;
 579 }
 580 EXPORT_SYMBOL(drm_sched_job_init);
 581
 582 /**
 583  * drm_sched_job_cleanup - clean up scheduler job resources
 584  *
 585  * @job: scheduler job to clean up
 586  */
 587 void drm_sched_job_cleanup(struct drm_sched_job *job)
 588 {
 589         dma_fence_put(&job->s_fence->finished);
 590         job->s_fence = NULL;
 591 }
 592 EXPORT_SYMBOL(drm_sched_job_cleanup);
 593
 594 /**
 595  * drm_sched_ready - is the scheduler ready
 596  *
 597  * @sched: scheduler instance
 598  *
 599  * Return true if we can push more jobs to the hw, otherwise false.
 600  */
 601 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 602 {
 603         return atomic_read(&sched->hw_rq_count) <
 604                 sched->hw_submission_limit;
 605 }
 606
 607 /**
 608  * drm_sched_wakeup - Wake up the scheduler when it is ready
 609  *
 610  * @sched: scheduler instance
 611  *
 612  */
 613 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 614 {
 615         if (drm_sched_ready(sched))
 616                 wake_up_interruptible(&sched->wake_up_worker);
 617 }
 618
 619 /**
 620  * drm_sched_select_entity - Select next entity to process
 621  *
 622  * @sched: scheduler instance
 623  *
 624  * Returns the entity to process or NULL if none are found.
 625  */
 626 static struct drm_sched_entity *
 627 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 628 {
 629         struct drm_sched_entity *entity;
 630         int i;
 631
 632         if (!drm_sched_ready(sched))
 633                 return NULL;
 634
 635         /* Kernel run queue has higher priority than normal run queue*/
 636         for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 637                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 638                 if (entity)
 639                         break;
 640         }
 641
 642         return entity;
 643 }
 644
 645 /**
 646  * drm_sched_process_job - process a job
 647  *
 648  * @f: fence
 649  * @cb: fence callbacks
 650  *
 651  * Called after job has finished execution.
 652  */
 653 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 654 {
 655         struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 656         struct drm_sched_fence *s_fence = s_job->s_fence;
 657         struct drm_gpu_scheduler *sched = s_fence->sched;
 658
 659         atomic_dec(&sched->hw_rq_count);
 660         atomic_dec(&sched->score);
 661
 662         trace_drm_sched_process_job(s_fence);
 663
 664         drm_sched_fence_finished(s_fence);
 665         wake_up_interruptible(&sched->wake_up_worker);
 666 }
 667
 668 /**
 669  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
 670  *
 671  * @sched: scheduler instance
 672  *
 673  * Returns the next finished job from the mirror list (if there is one)
 674  * ready for it to be destroyed.
 675  */
 676 static struct drm_sched_job *
 677 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 678 {
 679         struct drm_sched_job *job;
 680         unsigned long flags;
 681
 682         /*
 683          * Don't destroy jobs while the timeout worker is running  OR thread
 684          * is being parked and hence assumed to not touch ring_mirror_list
 685          */
 686         if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 687             !cancel_delayed_work(&sched->work_tdr)) ||
 688             __kthread_should_park(sched->thread))
 689                 return NULL;
 690
 691         spin_lock_irqsave(&sched->job_list_lock, flags);
 692
 693         job = list_first_entry_or_null(&sched->ring_mirror_list,
 694                                        struct drm_sched_job, node);
 695
 696         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
 697                 /* remove job from ring_mirror_list */
 698                 list_del_init(&job->node);
 699         } else {
 700                 job = NULL;
 701                 /* queue timeout for next job */
 702                 drm_sched_start_timeout(sched);
 703         }
 704
 705         spin_unlock_irqrestore(&sched->job_list_lock, flags);
 706
 707         return job;
 708 }
 709
 710 /**
 711  * drm_sched_blocked - check if the scheduler is blocked
 712  *
 713  * @sched: scheduler instance
 714  *
 715  * Returns true if blocked, otherwise false.
 716  */
 717 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 718 {
 719         if (kthread_should_park()) {
 720                 kthread_parkme();
 721                 return true;
 722         }
 723
 724         return false;
 725 }
 726
 727 /**
 728  * drm_sched_main - main scheduler thread
 729  *
 730  * @param: scheduler instance
 731  *
 732  * Returns 0.
 733  */
 734 static int drm_sched_main(void *param)
 735 {
 736         struct sched_param sparam = {.sched_priority = 1};
 737         struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 738         int r;
 739
 740         sched_setscheduler(current, SCHED_FIFO, &sparam);
 741
 742         while (!kthread_should_stop()) {
 743                 struct drm_sched_entity *entity = NULL;
 744                 struct drm_sched_fence *s_fence;
 745                 struct drm_sched_job *sched_job;
 746                 struct dma_fence *fence;
 747                 struct drm_sched_job *cleanup_job = NULL;
 748
 749                 wait_event_interruptible(sched->wake_up_worker,
 750                                          (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
 751                                          (!drm_sched_blocked(sched) &&
 752                                           (entity = drm_sched_select_entity(sched))) ||
 753                                          kthread_should_stop());
 754
 755                 if (cleanup_job) {
 756                         sched->ops->free_job(cleanup_job);
 757                         /* queue timeout for next job */
 758                         drm_sched_start_timeout(sched);
 759                 }
 760
 761                 if (!entity)
 762                         continue;
 763
 764                 sched_job = drm_sched_entity_pop_job(entity);
 765
 766                 complete(&entity->entity_idle);
 767
 768                 if (!sched_job)
 769                         continue;
 770
 771                 s_fence = sched_job->s_fence;
 772
 773                 atomic_inc(&sched->hw_rq_count);
 774                 drm_sched_job_begin(sched_job);
 775
 776                 fence = sched->ops->run_job(sched_job);
 777                 drm_sched_fence_scheduled(s_fence);
 778
 779                 if (!IS_ERR_OR_NULL(fence)) {
 780                         s_fence->parent = dma_fence_get(fence);
 781                         r = dma_fence_add_callback(fence, &sched_job->cb,
 782                                                    drm_sched_process_job);
 783                         if (r == -ENOENT)
 784                                 drm_sched_process_job(fence, &sched_job->cb);
 785                         else if (r)
 786                                 DRM_ERROR("fence add callback failed (%d)\n",
 787                                           r);
 788                         dma_fence_put(fence);
 789                 } else {
 790                         if (IS_ERR(fence))
 791                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
 792
 793                         drm_sched_process_job(NULL, &sched_job->cb);
 794                 }
 795
 796                 wake_up(&sched->job_scheduled);
 797         }
 798         return 0;
 799 }
 800
 801 /**
 802  * drm_sched_init - Init a gpu scheduler instance
 803  *
 804  * @sched: scheduler instance
 805  * @ops: backend operations for this scheduler
 806  * @hw_submission: number of hw submissions that can be in flight
 807  * @hang_limit: number of times to allow a job to hang before dropping it
 808  * @timeout: timeout value in jiffies for the scheduler
 809  * @name: name used for debugging
 810  *
 811  * Return 0 on success, otherwise error code.
 812  */
 813 int drm_sched_init(struct drm_gpu_scheduler *sched,
 814                    const struct drm_sched_backend_ops *ops,
 815                    unsigned hw_submission,
 816                    unsigned hang_limit,
 817                    long timeout,
 818                    const char *name)
 819 {
 820         int i, ret;
 821         sched->ops = ops;
 822         sched->hw_submission_limit = hw_submission;
 823         sched->name = name;
 824         sched->timeout = timeout;
 825         sched->hang_limit = hang_limit;
 826         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 827                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
 828
 829         init_waitqueue_head(&sched->wake_up_worker);
 830         init_waitqueue_head(&sched->job_scheduled);
 831         INIT_LIST_HEAD(&sched->ring_mirror_list);
 832         spin_lock_init(&sched->job_list_lock);
 833         atomic_set(&sched->hw_rq_count, 0);
 834         INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 835         atomic_set(&sched->score, 0);
 836         atomic64_set(&sched->job_id_count, 0);
 837
 838         /* Each scheduler will run on a seperate kernel thread */
 839         sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 840         if (IS_ERR(sched->thread)) {
 841                 ret = PTR_ERR(sched->thread);
 842                 sched->thread = NULL;
 843                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 844                 return ret;
 845         }
 846
 847         sched->ready = true;
 848         return 0;
 849 }
 850 EXPORT_SYMBOL(drm_sched_init);
 851
 852 /**
 853  * drm_sched_fini - Destroy a gpu scheduler
 854  *
 855  * @sched: scheduler instance
 856  *
 857  * Tears down and cleans up the scheduler.
 858  */
 859 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 860 {
 861         if (sched->thread)
 862                 kthread_stop(sched->thread);
 863
 864         sched->ready = false;
 865 }
 866 EXPORT_SYMBOL(drm_sched_fini);