2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <uapi/linux/sched/types.h>
29 #include <drm/gpu_scheduler.h>
30 #include <drm/spsc_queue.h>
32 #define CREATE_TRACE_POINTS
33 #include "gpu_scheduler_trace.h"
35 #define to_drm_sched_job(sched_job) \
36 container_of((sched_job), struct drm_sched_job, queue_node)
38 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
39 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
40 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
42 /* Initialize a given run queue struct */
43 static void drm_sched_rq_init(struct drm_sched_rq *rq)
45 spin_lock_init(&rq->lock);
46 INIT_LIST_HEAD(&rq->entities);
47 rq->current_entity = NULL;
50 static void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
51 struct drm_sched_entity *entity)
53 if (!list_empty(&entity->list))
56 list_add_tail(&entity->list, &rq->entities);
57 spin_unlock(&rq->lock);
60 static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
61 struct drm_sched_entity *entity)
63 if (list_empty(&entity->list))
66 list_del_init(&entity->list);
67 if (rq->current_entity == entity)
68 rq->current_entity = NULL;
69 spin_unlock(&rq->lock);
73 * Select an entity which could provide a job to run
75 * @rq The run queue to check.
77 * Try to find a ready entity, returns NULL if none found.
79 static struct drm_sched_entity *
80 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
82 struct drm_sched_entity *entity;
86 entity = rq->current_entity;
88 list_for_each_entry_continue(entity, &rq->entities, list) {
89 if (drm_sched_entity_is_ready(entity)) {
90 rq->current_entity = entity;
91 spin_unlock(&rq->lock);
97 list_for_each_entry(entity, &rq->entities, list) {
99 if (drm_sched_entity_is_ready(entity)) {
100 rq->current_entity = entity;
101 spin_unlock(&rq->lock);
105 if (entity == rq->current_entity)
109 spin_unlock(&rq->lock);
115 * Init a context entity used by scheduler when submit to HW ring.
117 * @sched The pointer to the scheduler
118 * @entity The pointer to a valid drm_sched_entity
119 * @rq The run queue this entity belongs
120 * @guilty atomic_t set to 1 when a job on this queue
121 * is found to be guilty causing a timeout
123 * return 0 if succeed. negative error code on failure
125 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
126 struct drm_sched_entity *entity,
127 struct drm_sched_rq *rq,
130 if (!(sched && entity && rq))
133 memset(entity, 0, sizeof(struct drm_sched_entity));
134 INIT_LIST_HEAD(&entity->list);
136 entity->sched = sched;
137 entity->guilty = guilty;
138 entity->fini_status = 0;
139 entity->last_scheduled = NULL;
141 spin_lock_init(&entity->rq_lock);
142 spsc_queue_init(&entity->job_queue);
144 atomic_set(&entity->fence_seq, 0);
145 entity->fence_context = dma_fence_context_alloc(2);
149 EXPORT_SYMBOL(drm_sched_entity_init);
152 * Query if entity is initialized
154 * @sched Pointer to scheduler instance
155 * @entity The pointer to a valid scheduler entity
157 * return true if entity is initialized, false otherwise
159 static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
160 struct drm_sched_entity *entity)
162 return entity->sched == sched &&
167 * Check if entity is idle
169 * @entity The pointer to a valid scheduler entity
171 * Return true if entity don't has any unscheduled jobs.
173 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
176 if (spsc_queue_peek(&entity->job_queue) == NULL)
183 * Check if entity is ready
185 * @entity The pointer to a valid scheduler entity
187 * Return true if entity could provide a job.
189 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
191 if (spsc_queue_peek(&entity->job_queue) == NULL)
194 if (READ_ONCE(entity->dependency))
200 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
201 struct dma_fence_cb *cb)
203 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
205 drm_sched_fence_finished(job->s_fence);
206 WARN_ON(job->s_fence->parent);
207 dma_fence_put(&job->s_fence->finished);
208 job->sched->ops->free_job(job);
213 * Destroy a context entity
215 * @sched Pointer to scheduler instance
216 * @entity The pointer to a valid scheduler entity
218 * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting,
219 * removes the entity from the runqueue and returns an error when the process was killed.
221 void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
222 struct drm_sched_entity *entity)
224 if (!drm_sched_entity_is_initialized(sched, entity))
227 * The client will not queue more IBs during this fini, consume existing
228 * queued IBs or discard them on SIGKILL
230 if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
231 entity->fini_status = -ERESTARTSYS;
233 entity->fini_status = wait_event_killable(sched->job_scheduled,
234 drm_sched_entity_is_idle(entity));
235 drm_sched_entity_set_rq(entity, NULL);
237 EXPORT_SYMBOL(drm_sched_entity_do_release);
240 * Destroy a context entity
242 * @sched Pointer to scheduler instance
243 * @entity The pointer to a valid scheduler entity
245 * The second one then goes over the entity and signals all jobs with an error code.
247 void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
248 struct drm_sched_entity *entity)
250 if (entity->fini_status) {
251 struct drm_sched_job *job;
254 /* Park the kernel for a moment to make sure it isn't processing
257 kthread_park(sched->thread);
258 kthread_unpark(sched->thread);
259 if (entity->dependency) {
260 dma_fence_remove_callback(entity->dependency,
262 dma_fence_put(entity->dependency);
263 entity->dependency = NULL;
266 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
267 struct drm_sched_fence *s_fence = job->s_fence;
268 drm_sched_fence_scheduled(s_fence);
269 dma_fence_set_error(&s_fence->finished, -ESRCH);
270 r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb,
271 drm_sched_entity_kill_jobs_cb);
273 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
275 DRM_ERROR("fence add callback failed (%d)\n", r);
279 dma_fence_put(entity->last_scheduled);
280 entity->last_scheduled = NULL;
282 EXPORT_SYMBOL(drm_sched_entity_cleanup);
284 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
285 struct drm_sched_entity *entity)
287 drm_sched_entity_do_release(sched, entity);
288 drm_sched_entity_cleanup(sched, entity);
290 EXPORT_SYMBOL(drm_sched_entity_fini);
292 static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
294 struct drm_sched_entity *entity =
295 container_of(cb, struct drm_sched_entity, cb);
296 entity->dependency = NULL;
298 drm_sched_wakeup(entity->sched);
301 static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
303 struct drm_sched_entity *entity =
304 container_of(cb, struct drm_sched_entity, cb);
305 entity->dependency = NULL;
309 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
310 struct drm_sched_rq *rq)
312 if (entity->rq == rq)
315 spin_lock(&entity->rq_lock);
318 drm_sched_rq_remove_entity(entity->rq, entity);
322 drm_sched_rq_add_entity(rq, entity);
324 spin_unlock(&entity->rq_lock);
326 EXPORT_SYMBOL(drm_sched_entity_set_rq);
328 bool drm_sched_dependency_optimized(struct dma_fence* fence,
329 struct drm_sched_entity *entity)
331 struct drm_gpu_scheduler *sched = entity->sched;
332 struct drm_sched_fence *s_fence;
334 if (!fence || dma_fence_is_signaled(fence))
336 if (fence->context == entity->fence_context)
338 s_fence = to_drm_sched_fence(fence);
339 if (s_fence && s_fence->sched == sched)
344 EXPORT_SYMBOL(drm_sched_dependency_optimized);
346 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
348 struct drm_gpu_scheduler *sched = entity->sched;
349 struct dma_fence * fence = entity->dependency;
350 struct drm_sched_fence *s_fence;
352 if (fence->context == entity->fence_context ||
353 fence->context == entity->fence_context + 1) {
355 * Fence is a scheduled/finished fence from a job
356 * which belongs to the same entity, we can ignore
357 * fences from ourself
359 dma_fence_put(entity->dependency);
363 s_fence = to_drm_sched_fence(fence);
364 if (s_fence && s_fence->sched == sched) {
367 * Fence is from the same scheduler, only need to wait for
370 fence = dma_fence_get(&s_fence->scheduled);
371 dma_fence_put(entity->dependency);
372 entity->dependency = fence;
373 if (!dma_fence_add_callback(fence, &entity->cb,
374 drm_sched_entity_clear_dep))
377 /* Ignore it when it is already scheduled */
378 dma_fence_put(fence);
382 if (!dma_fence_add_callback(entity->dependency, &entity->cb,
383 drm_sched_entity_wakeup))
386 dma_fence_put(entity->dependency);
390 static struct drm_sched_job *
391 drm_sched_entity_pop_job(struct drm_sched_entity *entity)
393 struct drm_gpu_scheduler *sched = entity->sched;
394 struct drm_sched_job *sched_job = to_drm_sched_job(
395 spsc_queue_peek(&entity->job_queue));
400 while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
401 if (drm_sched_entity_add_dependency_cb(entity))
404 /* skip jobs from entity that marked guilty */
405 if (entity->guilty && atomic_read(entity->guilty))
406 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
408 dma_fence_put(entity->last_scheduled);
409 entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
411 spsc_queue_pop(&entity->job_queue);
416 * Submit a job to the job queue
418 * @sched_job The pointer to job required to submit
420 * Note: To guarantee that the order of insertion to queue matches
421 * the job's fence sequence number this function should be
422 * called with drm_sched_job_init under common lock.
424 * Returns 0 for success, negative error code otherwise.
426 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
427 struct drm_sched_entity *entity)
429 struct drm_gpu_scheduler *sched = sched_job->sched;
432 trace_drm_sched_job(sched_job, entity);
434 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
436 /* first job wakes up scheduler */
438 /* Add the entity to the run queue */
439 spin_lock(&entity->rq_lock);
440 drm_sched_rq_add_entity(entity->rq, entity);
441 spin_unlock(&entity->rq_lock);
442 drm_sched_wakeup(sched);
445 EXPORT_SYMBOL(drm_sched_entity_push_job);
447 /* job_finish is called after hw fence signaled
449 static void drm_sched_job_finish(struct work_struct *work)
451 struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
453 struct drm_gpu_scheduler *sched = s_job->sched;
455 /* remove job from ring_mirror_list */
456 spin_lock(&sched->job_list_lock);
457 list_del_init(&s_job->node);
458 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
459 struct drm_sched_job *next;
461 spin_unlock(&sched->job_list_lock);
462 cancel_delayed_work_sync(&s_job->work_tdr);
463 spin_lock(&sched->job_list_lock);
465 /* queue TDR for next job */
466 next = list_first_entry_or_null(&sched->ring_mirror_list,
467 struct drm_sched_job, node);
470 schedule_delayed_work(&next->work_tdr, sched->timeout);
472 spin_unlock(&sched->job_list_lock);
473 dma_fence_put(&s_job->s_fence->finished);
474 sched->ops->free_job(s_job);
477 static void drm_sched_job_finish_cb(struct dma_fence *f,
478 struct dma_fence_cb *cb)
480 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
482 schedule_work(&job->finish_work);
485 static void drm_sched_job_begin(struct drm_sched_job *s_job)
487 struct drm_gpu_scheduler *sched = s_job->sched;
489 dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
490 drm_sched_job_finish_cb);
492 spin_lock(&sched->job_list_lock);
493 list_add_tail(&s_job->node, &sched->ring_mirror_list);
494 if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
495 list_first_entry_or_null(&sched->ring_mirror_list,
496 struct drm_sched_job, node) == s_job)
497 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
498 spin_unlock(&sched->job_list_lock);
501 static void drm_sched_job_timedout(struct work_struct *work)
503 struct drm_sched_job *job = container_of(work, struct drm_sched_job,
506 job->sched->ops->timedout_job(job);
509 void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
511 struct drm_sched_job *s_job;
512 struct drm_sched_entity *entity, *tmp;
515 spin_lock(&sched->job_list_lock);
516 list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
517 if (s_job->s_fence->parent &&
518 dma_fence_remove_callback(s_job->s_fence->parent,
519 &s_job->s_fence->cb)) {
520 dma_fence_put(s_job->s_fence->parent);
521 s_job->s_fence->parent = NULL;
522 atomic_dec(&sched->hw_rq_count);
525 spin_unlock(&sched->job_list_lock);
527 if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
528 atomic_inc(&bad->karma);
529 /* don't increase @bad's karma if it's from KERNEL RQ,
530 * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
531 * corrupt but keep in mind that kernel jobs always considered good.
533 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) {
534 struct drm_sched_rq *rq = &sched->sched_rq[i];
536 spin_lock(&rq->lock);
537 list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
538 if (bad->s_fence->scheduled.context == entity->fence_context) {
539 if (atomic_read(&bad->karma) > bad->sched->hang_limit)
541 atomic_set(entity->guilty, 1);
545 spin_unlock(&rq->lock);
546 if (&entity->list != &rq->entities)
551 EXPORT_SYMBOL(drm_sched_hw_job_reset);
553 void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
555 struct drm_sched_job *s_job, *tmp;
556 bool found_guilty = false;
559 spin_lock(&sched->job_list_lock);
560 s_job = list_first_entry_or_null(&sched->ring_mirror_list,
561 struct drm_sched_job, node);
562 if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
563 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
565 list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
566 struct drm_sched_fence *s_fence = s_job->s_fence;
567 struct dma_fence *fence;
568 uint64_t guilty_context;
570 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
572 guilty_context = s_job->s_fence->scheduled.context;
575 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
576 dma_fence_set_error(&s_fence->finished, -ECANCELED);
578 spin_unlock(&sched->job_list_lock);
579 fence = sched->ops->run_job(s_job);
580 atomic_inc(&sched->hw_rq_count);
583 s_fence->parent = dma_fence_get(fence);
584 r = dma_fence_add_callback(fence, &s_fence->cb,
585 drm_sched_process_job);
587 drm_sched_process_job(fence, &s_fence->cb);
589 DRM_ERROR("fence add callback failed (%d)\n",
591 dma_fence_put(fence);
593 drm_sched_process_job(NULL, &s_fence->cb);
595 spin_lock(&sched->job_list_lock);
597 spin_unlock(&sched->job_list_lock);
599 EXPORT_SYMBOL(drm_sched_job_recovery);
602 * Init a sched_job with basic field
604 * Note: Refer to drm_sched_entity_push_job documentation
605 * for locking considerations.
607 int drm_sched_job_init(struct drm_sched_job *job,
608 struct drm_gpu_scheduler *sched,
609 struct drm_sched_entity *entity,
613 job->entity = entity;
614 job->s_priority = entity->rq - sched->sched_rq;
615 job->s_fence = drm_sched_fence_create(entity, owner);
618 job->id = atomic64_inc_return(&sched->job_id_count);
620 INIT_WORK(&job->finish_work, drm_sched_job_finish);
621 INIT_LIST_HEAD(&job->node);
622 INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
626 EXPORT_SYMBOL(drm_sched_job_init);
629 * Return ture if we can push more jobs to the hw.
631 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
633 return atomic_read(&sched->hw_rq_count) <
634 sched->hw_submission_limit;
638 * Wake up the scheduler when it is ready
640 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
642 if (drm_sched_ready(sched))
643 wake_up_interruptible(&sched->wake_up_worker);
647 * Select next entity to process
649 static struct drm_sched_entity *
650 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
652 struct drm_sched_entity *entity;
655 if (!drm_sched_ready(sched))
658 /* Kernel run queue has higher priority than normal run queue*/
659 for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
660 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
668 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
670 struct drm_sched_fence *s_fence =
671 container_of(cb, struct drm_sched_fence, cb);
672 struct drm_gpu_scheduler *sched = s_fence->sched;
674 dma_fence_get(&s_fence->finished);
675 atomic_dec(&sched->hw_rq_count);
676 drm_sched_fence_finished(s_fence);
678 trace_drm_sched_process_job(s_fence);
679 dma_fence_put(&s_fence->finished);
680 wake_up_interruptible(&sched->wake_up_worker);
683 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
685 if (kthread_should_park()) {
693 static int drm_sched_main(void *param)
695 struct sched_param sparam = {.sched_priority = 1};
696 struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
699 sched_setscheduler(current, SCHED_FIFO, &sparam);
701 while (!kthread_should_stop()) {
702 struct drm_sched_entity *entity = NULL;
703 struct drm_sched_fence *s_fence;
704 struct drm_sched_job *sched_job;
705 struct dma_fence *fence;
707 wait_event_interruptible(sched->wake_up_worker,
708 (!drm_sched_blocked(sched) &&
709 (entity = drm_sched_select_entity(sched))) ||
710 kthread_should_stop());
715 sched_job = drm_sched_entity_pop_job(entity);
719 s_fence = sched_job->s_fence;
721 atomic_inc(&sched->hw_rq_count);
722 drm_sched_job_begin(sched_job);
724 fence = sched->ops->run_job(sched_job);
725 drm_sched_fence_scheduled(s_fence);
728 s_fence->parent = dma_fence_get(fence);
729 r = dma_fence_add_callback(fence, &s_fence->cb,
730 drm_sched_process_job);
732 drm_sched_process_job(fence, &s_fence->cb);
734 DRM_ERROR("fence add callback failed (%d)\n",
736 dma_fence_put(fence);
738 drm_sched_process_job(NULL, &s_fence->cb);
741 wake_up(&sched->job_scheduled);
747 * Init a gpu scheduler instance
749 * @sched The pointer to the scheduler
750 * @ops The backend operations for this scheduler.
751 * @hw_submissions Number of hw submissions to do.
752 * @name Name used for debugging
754 * Return 0 on success, otherwise error code.
756 int drm_sched_init(struct drm_gpu_scheduler *sched,
757 const struct drm_sched_backend_ops *ops,
758 unsigned hw_submission,
765 sched->hw_submission_limit = hw_submission;
767 sched->timeout = timeout;
768 sched->hang_limit = hang_limit;
769 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
770 drm_sched_rq_init(&sched->sched_rq[i]);
772 init_waitqueue_head(&sched->wake_up_worker);
773 init_waitqueue_head(&sched->job_scheduled);
774 INIT_LIST_HEAD(&sched->ring_mirror_list);
775 spin_lock_init(&sched->job_list_lock);
776 atomic_set(&sched->hw_rq_count, 0);
777 atomic64_set(&sched->job_id_count, 0);
779 /* Each scheduler will run on a seperate kernel thread */
780 sched->thread = kthread_run(drm_sched_main, sched, sched->name);
781 if (IS_ERR(sched->thread)) {
782 DRM_ERROR("Failed to create scheduler for %s.\n", name);
783 return PTR_ERR(sched->thread);
788 EXPORT_SYMBOL(drm_sched_init);
791 * Destroy a gpu scheduler
793 * @sched The pointer to the scheduler
795 void drm_sched_fini(struct drm_gpu_scheduler *sched)
798 kthread_stop(sched->thread);
800 EXPORT_SYMBOL(drm_sched_fini);