2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <uapi/linux/sched/types.h>
29 #include <drm/gpu_scheduler.h>
30 #include <drm/spsc_queue.h>
32 #define CREATE_TRACE_POINTS
33 #include "gpu_scheduler_trace.h"
35 #define to_drm_sched_job(sched_job) \
36 container_of((sched_job), struct drm_sched_job, queue_node)
38 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
39 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
40 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
42 /* Initialize a given run queue struct */
43 static void drm_sched_rq_init(struct drm_sched_rq *rq)
45 spin_lock_init(&rq->lock);
46 INIT_LIST_HEAD(&rq->entities);
47 rq->current_entity = NULL;
50 static void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
51 struct drm_sched_entity *entity)
53 if (!list_empty(&entity->list))
56 list_add_tail(&entity->list, &rq->entities);
57 spin_unlock(&rq->lock);
60 static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
61 struct drm_sched_entity *entity)
63 if (list_empty(&entity->list))
66 list_del_init(&entity->list);
67 if (rq->current_entity == entity)
68 rq->current_entity = NULL;
69 spin_unlock(&rq->lock);
73 * Select an entity which could provide a job to run
75 * @rq The run queue to check.
77 * Try to find a ready entity, returns NULL if none found.
79 static struct drm_sched_entity *
80 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
82 struct drm_sched_entity *entity;
86 entity = rq->current_entity;
88 list_for_each_entry_continue(entity, &rq->entities, list) {
89 if (drm_sched_entity_is_ready(entity)) {
90 rq->current_entity = entity;
91 spin_unlock(&rq->lock);
97 list_for_each_entry(entity, &rq->entities, list) {
99 if (drm_sched_entity_is_ready(entity)) {
100 rq->current_entity = entity;
101 spin_unlock(&rq->lock);
105 if (entity == rq->current_entity)
109 spin_unlock(&rq->lock);
115 * Init a context entity used by scheduler when submit to HW ring.
117 * @sched The pointer to the scheduler
118 * @entity The pointer to a valid drm_sched_entity
119 * @rq The run queue this entity belongs
120 * @jobs The max number of jobs in the job queue
121 * @guilty atomic_t set to 1 when a job on this queue
122 * is found to be guilty causing a timeout
124 * return 0 if succeed. negative error code on failure
126 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
127 struct drm_sched_entity *entity,
128 struct drm_sched_rq *rq,
129 uint32_t jobs, atomic_t *guilty)
131 if (!(sched && entity && rq))
134 memset(entity, 0, sizeof(struct drm_sched_entity));
135 INIT_LIST_HEAD(&entity->list);
137 entity->sched = sched;
138 entity->guilty = guilty;
139 entity->fini_status = 0;
140 entity->last_scheduled = NULL;
142 spin_lock_init(&entity->rq_lock);
143 spin_lock_init(&entity->queue_lock);
144 spsc_queue_init(&entity->job_queue);
146 atomic_set(&entity->fence_seq, 0);
147 entity->fence_context = dma_fence_context_alloc(2);
151 EXPORT_SYMBOL(drm_sched_entity_init);
154 * Query if entity is initialized
156 * @sched Pointer to scheduler instance
157 * @entity The pointer to a valid scheduler entity
159 * return true if entity is initialized, false otherwise
161 static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
162 struct drm_sched_entity *entity)
164 return entity->sched == sched &&
169 * Check if entity is idle
171 * @entity The pointer to a valid scheduler entity
173 * Return true if entity don't has any unscheduled jobs.
175 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
178 if (spsc_queue_peek(&entity->job_queue) == NULL)
185 * Check if entity is ready
187 * @entity The pointer to a valid scheduler entity
189 * Return true if entity could provide a job.
191 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
193 if (spsc_queue_peek(&entity->job_queue) == NULL)
196 if (READ_ONCE(entity->dependency))
202 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
203 struct dma_fence_cb *cb)
205 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
207 drm_sched_fence_finished(job->s_fence);
208 WARN_ON(job->s_fence->parent);
209 dma_fence_put(&job->s_fence->finished);
210 job->sched->ops->free_job(job);
215 * Destroy a context entity
217 * @sched Pointer to scheduler instance
218 * @entity The pointer to a valid scheduler entity
220 * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting,
221 * removes the entity from the runqueue and returns an error when the process was killed.
223 void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
224 struct drm_sched_entity *entity)
226 if (!drm_sched_entity_is_initialized(sched, entity))
229 * The client will not queue more IBs during this fini, consume existing
230 * queued IBs or discard them on SIGKILL
232 if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
233 entity->fini_status = -ERESTARTSYS;
235 entity->fini_status = wait_event_killable(sched->job_scheduled,
236 drm_sched_entity_is_idle(entity));
237 drm_sched_entity_set_rq(entity, NULL);
239 EXPORT_SYMBOL(drm_sched_entity_do_release);
242 * Destroy a context entity
244 * @sched Pointer to scheduler instance
245 * @entity The pointer to a valid scheduler entity
247 * The second one then goes over the entity and signals all jobs with an error code.
249 void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
250 struct drm_sched_entity *entity)
252 if (entity->fini_status) {
253 struct drm_sched_job *job;
256 /* Park the kernel for a moment to make sure it isn't processing
259 kthread_park(sched->thread);
260 kthread_unpark(sched->thread);
261 if (entity->dependency) {
262 dma_fence_remove_callback(entity->dependency,
264 dma_fence_put(entity->dependency);
265 entity->dependency = NULL;
268 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
269 struct drm_sched_fence *s_fence = job->s_fence;
270 drm_sched_fence_scheduled(s_fence);
271 dma_fence_set_error(&s_fence->finished, -ESRCH);
272 r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb,
273 drm_sched_entity_kill_jobs_cb);
275 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
277 DRM_ERROR("fence add callback failed (%d)\n", r);
280 dma_fence_put(entity->last_scheduled);
281 entity->last_scheduled = NULL;
284 EXPORT_SYMBOL(drm_sched_entity_cleanup);
286 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
287 struct drm_sched_entity *entity)
289 drm_sched_entity_do_release(sched, entity);
290 drm_sched_entity_cleanup(sched, entity);
292 EXPORT_SYMBOL(drm_sched_entity_fini);
294 static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
296 struct drm_sched_entity *entity =
297 container_of(cb, struct drm_sched_entity, cb);
298 entity->dependency = NULL;
300 drm_sched_wakeup(entity->sched);
303 static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
305 struct drm_sched_entity *entity =
306 container_of(cb, struct drm_sched_entity, cb);
307 entity->dependency = NULL;
311 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
312 struct drm_sched_rq *rq)
314 if (entity->rq == rq)
317 spin_lock(&entity->rq_lock);
320 drm_sched_rq_remove_entity(entity->rq, entity);
324 drm_sched_rq_add_entity(rq, entity);
326 spin_unlock(&entity->rq_lock);
328 EXPORT_SYMBOL(drm_sched_entity_set_rq);
330 bool drm_sched_dependency_optimized(struct dma_fence* fence,
331 struct drm_sched_entity *entity)
333 struct drm_gpu_scheduler *sched = entity->sched;
334 struct drm_sched_fence *s_fence;
336 if (!fence || dma_fence_is_signaled(fence))
338 if (fence->context == entity->fence_context)
340 s_fence = to_drm_sched_fence(fence);
341 if (s_fence && s_fence->sched == sched)
346 EXPORT_SYMBOL(drm_sched_dependency_optimized);
348 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
350 struct drm_gpu_scheduler *sched = entity->sched;
351 struct dma_fence * fence = entity->dependency;
352 struct drm_sched_fence *s_fence;
354 if (fence->context == entity->fence_context) {
355 /* We can ignore fences from ourself */
356 dma_fence_put(entity->dependency);
360 s_fence = to_drm_sched_fence(fence);
361 if (s_fence && s_fence->sched == sched) {
364 * Fence is from the same scheduler, only need to wait for
367 fence = dma_fence_get(&s_fence->scheduled);
368 dma_fence_put(entity->dependency);
369 entity->dependency = fence;
370 if (!dma_fence_add_callback(fence, &entity->cb,
371 drm_sched_entity_clear_dep))
374 /* Ignore it when it is already scheduled */
375 dma_fence_put(fence);
379 if (!dma_fence_add_callback(entity->dependency, &entity->cb,
380 drm_sched_entity_wakeup))
383 dma_fence_put(entity->dependency);
387 static struct drm_sched_job *
388 drm_sched_entity_pop_job(struct drm_sched_entity *entity)
390 struct drm_gpu_scheduler *sched = entity->sched;
391 struct drm_sched_job *sched_job = to_drm_sched_job(
392 spsc_queue_peek(&entity->job_queue));
397 while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
398 if (drm_sched_entity_add_dependency_cb(entity))
401 /* skip jobs from entity that marked guilty */
402 if (entity->guilty && atomic_read(entity->guilty))
403 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
405 spsc_queue_pop(&entity->job_queue);
410 * Submit a job to the job queue
412 * @sched_job The pointer to job required to submit
414 * Returns 0 for success, negative error code otherwise.
416 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
417 struct drm_sched_entity *entity)
419 struct drm_gpu_scheduler *sched = sched_job->sched;
422 trace_drm_sched_job(sched_job, entity);
424 spin_lock(&entity->queue_lock);
425 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
427 spin_unlock(&entity->queue_lock);
429 /* first job wakes up scheduler */
431 /* Add the entity to the run queue */
432 spin_lock(&entity->rq_lock);
433 drm_sched_rq_add_entity(entity->rq, entity);
434 spin_unlock(&entity->rq_lock);
435 drm_sched_wakeup(sched);
438 EXPORT_SYMBOL(drm_sched_entity_push_job);
440 /* job_finish is called after hw fence signaled
442 static void drm_sched_job_finish(struct work_struct *work)
444 struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
446 struct drm_gpu_scheduler *sched = s_job->sched;
448 /* remove job from ring_mirror_list */
449 spin_lock(&sched->job_list_lock);
450 list_del_init(&s_job->node);
451 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
452 struct drm_sched_job *next;
454 spin_unlock(&sched->job_list_lock);
455 cancel_delayed_work_sync(&s_job->work_tdr);
456 spin_lock(&sched->job_list_lock);
458 /* queue TDR for next job */
459 next = list_first_entry_or_null(&sched->ring_mirror_list,
460 struct drm_sched_job, node);
463 schedule_delayed_work(&next->work_tdr, sched->timeout);
465 spin_unlock(&sched->job_list_lock);
466 dma_fence_put(&s_job->s_fence->finished);
467 sched->ops->free_job(s_job);
470 static void drm_sched_job_finish_cb(struct dma_fence *f,
471 struct dma_fence_cb *cb)
473 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
475 schedule_work(&job->finish_work);
478 static void drm_sched_job_begin(struct drm_sched_job *s_job)
480 struct drm_gpu_scheduler *sched = s_job->sched;
482 dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
483 drm_sched_job_finish_cb);
485 spin_lock(&sched->job_list_lock);
486 list_add_tail(&s_job->node, &sched->ring_mirror_list);
487 if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
488 list_first_entry_or_null(&sched->ring_mirror_list,
489 struct drm_sched_job, node) == s_job)
490 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
491 spin_unlock(&sched->job_list_lock);
494 static void drm_sched_job_timedout(struct work_struct *work)
496 struct drm_sched_job *job = container_of(work, struct drm_sched_job,
499 job->sched->ops->timedout_job(job);
502 void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
504 struct drm_sched_job *s_job;
505 struct drm_sched_entity *entity, *tmp;
508 spin_lock(&sched->job_list_lock);
509 list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
510 if (s_job->s_fence->parent &&
511 dma_fence_remove_callback(s_job->s_fence->parent,
512 &s_job->s_fence->cb)) {
513 dma_fence_put(s_job->s_fence->parent);
514 s_job->s_fence->parent = NULL;
515 atomic_dec(&sched->hw_rq_count);
518 spin_unlock(&sched->job_list_lock);
520 if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
521 atomic_inc(&bad->karma);
522 /* don't increase @bad's karma if it's from KERNEL RQ,
523 * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
524 * corrupt but keep in mind that kernel jobs always considered good.
526 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) {
527 struct drm_sched_rq *rq = &sched->sched_rq[i];
529 spin_lock(&rq->lock);
530 list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
531 if (bad->s_fence->scheduled.context == entity->fence_context) {
532 if (atomic_read(&bad->karma) > bad->sched->hang_limit)
534 atomic_set(entity->guilty, 1);
538 spin_unlock(&rq->lock);
539 if (&entity->list != &rq->entities)
544 EXPORT_SYMBOL(drm_sched_hw_job_reset);
546 void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
548 struct drm_sched_job *s_job, *tmp;
549 bool found_guilty = false;
552 spin_lock(&sched->job_list_lock);
553 s_job = list_first_entry_or_null(&sched->ring_mirror_list,
554 struct drm_sched_job, node);
555 if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
556 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
558 list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
559 struct drm_sched_fence *s_fence = s_job->s_fence;
560 struct dma_fence *fence;
561 uint64_t guilty_context;
563 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
565 guilty_context = s_job->s_fence->scheduled.context;
568 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
569 dma_fence_set_error(&s_fence->finished, -ECANCELED);
571 spin_unlock(&sched->job_list_lock);
572 fence = sched->ops->run_job(s_job);
573 atomic_inc(&sched->hw_rq_count);
575 dma_fence_put(s_job->entity->last_scheduled);
576 s_job->entity->last_scheduled = dma_fence_get(&s_fence->finished);
579 s_fence->parent = dma_fence_get(fence);
580 r = dma_fence_add_callback(fence, &s_fence->cb,
581 drm_sched_process_job);
583 drm_sched_process_job(fence, &s_fence->cb);
585 DRM_ERROR("fence add callback failed (%d)\n",
587 dma_fence_put(fence);
589 drm_sched_process_job(NULL, &s_fence->cb);
591 spin_lock(&sched->job_list_lock);
593 spin_unlock(&sched->job_list_lock);
595 EXPORT_SYMBOL(drm_sched_job_recovery);
597 /* init a sched_job with basic field */
598 int drm_sched_job_init(struct drm_sched_job *job,
599 struct drm_gpu_scheduler *sched,
600 struct drm_sched_entity *entity,
604 job->entity = entity;
605 job->s_priority = entity->rq - sched->sched_rq;
606 job->s_fence = drm_sched_fence_create(entity, owner);
609 job->id = atomic64_inc_return(&sched->job_id_count);
611 INIT_WORK(&job->finish_work, drm_sched_job_finish);
612 INIT_LIST_HEAD(&job->node);
613 INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
617 EXPORT_SYMBOL(drm_sched_job_init);
620 * Return ture if we can push more jobs to the hw.
622 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
624 return atomic_read(&sched->hw_rq_count) <
625 sched->hw_submission_limit;
629 * Wake up the scheduler when it is ready
631 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
633 if (drm_sched_ready(sched))
634 wake_up_interruptible(&sched->wake_up_worker);
638 * Select next entity to process
640 static struct drm_sched_entity *
641 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
643 struct drm_sched_entity *entity;
646 if (!drm_sched_ready(sched))
649 /* Kernel run queue has higher priority than normal run queue*/
650 for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
651 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
659 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
661 struct drm_sched_fence *s_fence =
662 container_of(cb, struct drm_sched_fence, cb);
663 struct drm_gpu_scheduler *sched = s_fence->sched;
665 dma_fence_get(&s_fence->finished);
666 atomic_dec(&sched->hw_rq_count);
667 drm_sched_fence_finished(s_fence);
669 trace_drm_sched_process_job(s_fence);
670 dma_fence_put(&s_fence->finished);
671 wake_up_interruptible(&sched->wake_up_worker);
674 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
676 if (kthread_should_park()) {
684 static int drm_sched_main(void *param)
686 struct sched_param sparam = {.sched_priority = 1};
687 struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
690 sched_setscheduler(current, SCHED_FIFO, &sparam);
692 while (!kthread_should_stop()) {
693 struct drm_sched_entity *entity = NULL;
694 struct drm_sched_fence *s_fence;
695 struct drm_sched_job *sched_job;
696 struct dma_fence *fence;
698 wait_event_interruptible(sched->wake_up_worker,
699 (!drm_sched_blocked(sched) &&
700 (entity = drm_sched_select_entity(sched))) ||
701 kthread_should_stop());
706 sched_job = drm_sched_entity_pop_job(entity);
710 s_fence = sched_job->s_fence;
712 atomic_inc(&sched->hw_rq_count);
713 drm_sched_job_begin(sched_job);
715 fence = sched->ops->run_job(sched_job);
716 drm_sched_fence_scheduled(s_fence);
718 dma_fence_put(entity->last_scheduled);
719 entity->last_scheduled = dma_fence_get(&s_fence->finished);
722 s_fence->parent = dma_fence_get(fence);
723 r = dma_fence_add_callback(fence, &s_fence->cb,
724 drm_sched_process_job);
726 drm_sched_process_job(fence, &s_fence->cb);
728 DRM_ERROR("fence add callback failed (%d)\n",
730 dma_fence_put(fence);
732 drm_sched_process_job(NULL, &s_fence->cb);
735 wake_up(&sched->job_scheduled);
741 * Init a gpu scheduler instance
743 * @sched The pointer to the scheduler
744 * @ops The backend operations for this scheduler.
745 * @hw_submissions Number of hw submissions to do.
746 * @name Name used for debugging
748 * Return 0 on success, otherwise error code.
750 int drm_sched_init(struct drm_gpu_scheduler *sched,
751 const struct drm_sched_backend_ops *ops,
752 unsigned hw_submission,
759 sched->hw_submission_limit = hw_submission;
761 sched->timeout = timeout;
762 sched->hang_limit = hang_limit;
763 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
764 drm_sched_rq_init(&sched->sched_rq[i]);
766 init_waitqueue_head(&sched->wake_up_worker);
767 init_waitqueue_head(&sched->job_scheduled);
768 INIT_LIST_HEAD(&sched->ring_mirror_list);
769 spin_lock_init(&sched->job_list_lock);
770 atomic_set(&sched->hw_rq_count, 0);
771 atomic64_set(&sched->job_id_count, 0);
773 /* Each scheduler will run on a seperate kernel thread */
774 sched->thread = kthread_run(drm_sched_main, sched, sched->name);
775 if (IS_ERR(sched->thread)) {
776 DRM_ERROR("Failed to create scheduler for %s.\n", name);
777 return PTR_ERR(sched->thread);
782 EXPORT_SYMBOL(drm_sched_init);
785 * Destroy a gpu scheduler
787 * @sched The pointer to the scheduler
789 void drm_sched_fini(struct drm_gpu_scheduler *sched)
792 kthread_stop(sched->thread);
794 EXPORT_SYMBOL(drm_sched_fini);