2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <uapi/linux/sched/types.h>
29 #include <drm/gpu_scheduler.h>
30 #include <drm/spsc_queue.h>
32 #define CREATE_TRACE_POINTS
33 #include "gpu_scheduler_trace.h"
35 #define to_drm_sched_job(sched_job) \
36 container_of((sched_job), struct drm_sched_job, queue_node)
38 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
39 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
40 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
42 /* Initialize a given run queue struct */
43 static void drm_sched_rq_init(struct drm_sched_rq *rq)
45 spin_lock_init(&rq->lock);
46 INIT_LIST_HEAD(&rq->entities);
47 rq->current_entity = NULL;
50 static void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
51 struct drm_sched_entity *entity)
53 if (!list_empty(&entity->list))
56 list_add_tail(&entity->list, &rq->entities);
57 spin_unlock(&rq->lock);
60 static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
61 struct drm_sched_entity *entity)
63 if (list_empty(&entity->list))
66 list_del_init(&entity->list);
67 if (rq->current_entity == entity)
68 rq->current_entity = NULL;
69 spin_unlock(&rq->lock);
73 * Select an entity which could provide a job to run
75 * @rq The run queue to check.
77 * Try to find a ready entity, returns NULL if none found.
79 static struct drm_sched_entity *
80 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
82 struct drm_sched_entity *entity;
86 entity = rq->current_entity;
88 list_for_each_entry_continue(entity, &rq->entities, list) {
89 if (drm_sched_entity_is_ready(entity)) {
90 rq->current_entity = entity;
91 spin_unlock(&rq->lock);
97 list_for_each_entry(entity, &rq->entities, list) {
99 if (drm_sched_entity_is_ready(entity)) {
100 rq->current_entity = entity;
101 spin_unlock(&rq->lock);
105 if (entity == rq->current_entity)
109 spin_unlock(&rq->lock);
115 * Init a context entity used by scheduler when submit to HW ring.
117 * @sched The pointer to the scheduler
118 * @entity The pointer to a valid drm_sched_entity
119 * @rq The run queue this entity belongs
120 * @guilty atomic_t set to 1 when a job on this queue
121 * is found to be guilty causing a timeout
123 * return 0 if succeed. negative error code on failure
125 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
126 struct drm_sched_entity *entity,
127 struct drm_sched_rq *rq,
130 if (!(sched && entity && rq))
133 memset(entity, 0, sizeof(struct drm_sched_entity));
134 INIT_LIST_HEAD(&entity->list);
136 entity->sched = sched;
137 entity->guilty = guilty;
138 entity->fini_status = 0;
139 entity->last_scheduled = NULL;
141 spin_lock_init(&entity->rq_lock);
142 spsc_queue_init(&entity->job_queue);
144 atomic_set(&entity->fence_seq, 0);
145 entity->fence_context = dma_fence_context_alloc(2);
149 EXPORT_SYMBOL(drm_sched_entity_init);
152 * Query if entity is initialized
154 * @sched Pointer to scheduler instance
155 * @entity The pointer to a valid scheduler entity
157 * return true if entity is initialized, false otherwise
159 static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
160 struct drm_sched_entity *entity)
162 return entity->sched == sched &&
167 * Check if entity is idle
169 * @entity The pointer to a valid scheduler entity
171 * Return true if entity don't has any unscheduled jobs.
173 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
176 if (spsc_queue_peek(&entity->job_queue) == NULL)
183 * Check if entity is ready
185 * @entity The pointer to a valid scheduler entity
187 * Return true if entity could provide a job.
189 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
191 if (spsc_queue_peek(&entity->job_queue) == NULL)
194 if (READ_ONCE(entity->dependency))
200 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
201 struct dma_fence_cb *cb)
203 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
205 drm_sched_fence_finished(job->s_fence);
206 WARN_ON(job->s_fence->parent);
207 dma_fence_put(&job->s_fence->finished);
208 job->sched->ops->free_job(job);
213 * Destroy a context entity
215 * @sched Pointer to scheduler instance
216 * @entity The pointer to a valid scheduler entity
218 * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting,
219 * removes the entity from the runqueue and returns an error when the process was killed.
221 void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
222 struct drm_sched_entity *entity)
224 if (!drm_sched_entity_is_initialized(sched, entity))
227 * The client will not queue more IBs during this fini, consume existing
228 * queued IBs or discard them on SIGKILL
230 if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
231 entity->fini_status = -ERESTARTSYS;
233 entity->fini_status = wait_event_killable(sched->job_scheduled,
234 drm_sched_entity_is_idle(entity));
235 drm_sched_entity_set_rq(entity, NULL);
237 EXPORT_SYMBOL(drm_sched_entity_do_release);
240 * Destroy a context entity
242 * @sched Pointer to scheduler instance
243 * @entity The pointer to a valid scheduler entity
245 * The second one then goes over the entity and signals all jobs with an error code.
247 void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
248 struct drm_sched_entity *entity)
250 if (entity->fini_status) {
251 struct drm_sched_job *job;
254 /* Park the kernel for a moment to make sure it isn't processing
257 kthread_park(sched->thread);
258 kthread_unpark(sched->thread);
259 if (entity->dependency) {
260 dma_fence_remove_callback(entity->dependency,
262 dma_fence_put(entity->dependency);
263 entity->dependency = NULL;
266 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
267 struct drm_sched_fence *s_fence = job->s_fence;
268 drm_sched_fence_scheduled(s_fence);
269 dma_fence_set_error(&s_fence->finished, -ESRCH);
270 r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb,
271 drm_sched_entity_kill_jobs_cb);
273 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
275 DRM_ERROR("fence add callback failed (%d)\n", r);
279 dma_fence_put(entity->last_scheduled);
280 entity->last_scheduled = NULL;
282 EXPORT_SYMBOL(drm_sched_entity_cleanup);
284 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
285 struct drm_sched_entity *entity)
287 drm_sched_entity_do_release(sched, entity);
288 drm_sched_entity_cleanup(sched, entity);
290 EXPORT_SYMBOL(drm_sched_entity_fini);
292 static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
294 struct drm_sched_entity *entity =
295 container_of(cb, struct drm_sched_entity, cb);
296 entity->dependency = NULL;
298 drm_sched_wakeup(entity->sched);
301 static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
303 struct drm_sched_entity *entity =
304 container_of(cb, struct drm_sched_entity, cb);
305 entity->dependency = NULL;
309 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
310 struct drm_sched_rq *rq)
312 if (entity->rq == rq)
315 spin_lock(&entity->rq_lock);
318 drm_sched_rq_remove_entity(entity->rq, entity);
322 drm_sched_rq_add_entity(rq, entity);
324 spin_unlock(&entity->rq_lock);
326 EXPORT_SYMBOL(drm_sched_entity_set_rq);
328 bool drm_sched_dependency_optimized(struct dma_fence* fence,
329 struct drm_sched_entity *entity)
331 struct drm_gpu_scheduler *sched = entity->sched;
332 struct drm_sched_fence *s_fence;
334 if (!fence || dma_fence_is_signaled(fence))
336 if (fence->context == entity->fence_context)
338 s_fence = to_drm_sched_fence(fence);
339 if (s_fence && s_fence->sched == sched)
344 EXPORT_SYMBOL(drm_sched_dependency_optimized);
346 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
348 struct drm_gpu_scheduler *sched = entity->sched;
349 struct dma_fence * fence = entity->dependency;
350 struct drm_sched_fence *s_fence;
352 if (fence->context == entity->fence_context) {
353 /* We can ignore fences from ourself */
354 dma_fence_put(entity->dependency);
358 s_fence = to_drm_sched_fence(fence);
359 if (s_fence && s_fence->sched == sched) {
362 * Fence is from the same scheduler, only need to wait for
365 fence = dma_fence_get(&s_fence->scheduled);
366 dma_fence_put(entity->dependency);
367 entity->dependency = fence;
368 if (!dma_fence_add_callback(fence, &entity->cb,
369 drm_sched_entity_clear_dep))
372 /* Ignore it when it is already scheduled */
373 dma_fence_put(fence);
377 if (!dma_fence_add_callback(entity->dependency, &entity->cb,
378 drm_sched_entity_wakeup))
381 dma_fence_put(entity->dependency);
385 static struct drm_sched_job *
386 drm_sched_entity_pop_job(struct drm_sched_entity *entity)
388 struct drm_gpu_scheduler *sched = entity->sched;
389 struct drm_sched_job *sched_job = to_drm_sched_job(
390 spsc_queue_peek(&entity->job_queue));
395 while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
396 if (drm_sched_entity_add_dependency_cb(entity))
399 /* skip jobs from entity that marked guilty */
400 if (entity->guilty && atomic_read(entity->guilty))
401 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
403 dma_fence_put(entity->last_scheduled);
404 entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
406 spsc_queue_pop(&entity->job_queue);
411 * Submit a job to the job queue
413 * @sched_job The pointer to job required to submit
415 * Note: To guarantee that the order of insertion to queue matches
416 * the job's fence sequence number this function should be
417 * called with drm_sched_job_init under common lock.
419 * Returns 0 for success, negative error code otherwise.
421 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
422 struct drm_sched_entity *entity)
424 struct drm_gpu_scheduler *sched = sched_job->sched;
427 trace_drm_sched_job(sched_job, entity);
429 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
431 /* first job wakes up scheduler */
433 /* Add the entity to the run queue */
434 spin_lock(&entity->rq_lock);
435 drm_sched_rq_add_entity(entity->rq, entity);
436 spin_unlock(&entity->rq_lock);
437 drm_sched_wakeup(sched);
440 EXPORT_SYMBOL(drm_sched_entity_push_job);
442 /* job_finish is called after hw fence signaled
444 static void drm_sched_job_finish(struct work_struct *work)
446 struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
448 struct drm_gpu_scheduler *sched = s_job->sched;
450 /* remove job from ring_mirror_list */
451 spin_lock(&sched->job_list_lock);
452 list_del_init(&s_job->node);
453 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
454 struct drm_sched_job *next;
456 spin_unlock(&sched->job_list_lock);
457 cancel_delayed_work_sync(&s_job->work_tdr);
458 spin_lock(&sched->job_list_lock);
460 /* queue TDR for next job */
461 next = list_first_entry_or_null(&sched->ring_mirror_list,
462 struct drm_sched_job, node);
465 schedule_delayed_work(&next->work_tdr, sched->timeout);
467 spin_unlock(&sched->job_list_lock);
468 dma_fence_put(&s_job->s_fence->finished);
469 sched->ops->free_job(s_job);
472 static void drm_sched_job_finish_cb(struct dma_fence *f,
473 struct dma_fence_cb *cb)
475 struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
477 schedule_work(&job->finish_work);
480 static void drm_sched_job_begin(struct drm_sched_job *s_job)
482 struct drm_gpu_scheduler *sched = s_job->sched;
484 dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
485 drm_sched_job_finish_cb);
487 spin_lock(&sched->job_list_lock);
488 list_add_tail(&s_job->node, &sched->ring_mirror_list);
489 if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
490 list_first_entry_or_null(&sched->ring_mirror_list,
491 struct drm_sched_job, node) == s_job)
492 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
493 spin_unlock(&sched->job_list_lock);
496 static void drm_sched_job_timedout(struct work_struct *work)
498 struct drm_sched_job *job = container_of(work, struct drm_sched_job,
501 job->sched->ops->timedout_job(job);
504 void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
506 struct drm_sched_job *s_job;
507 struct drm_sched_entity *entity, *tmp;
510 spin_lock(&sched->job_list_lock);
511 list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
512 if (s_job->s_fence->parent &&
513 dma_fence_remove_callback(s_job->s_fence->parent,
514 &s_job->s_fence->cb)) {
515 dma_fence_put(s_job->s_fence->parent);
516 s_job->s_fence->parent = NULL;
517 atomic_dec(&sched->hw_rq_count);
520 spin_unlock(&sched->job_list_lock);
522 if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
523 atomic_inc(&bad->karma);
524 /* don't increase @bad's karma if it's from KERNEL RQ,
525 * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
526 * corrupt but keep in mind that kernel jobs always considered good.
528 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) {
529 struct drm_sched_rq *rq = &sched->sched_rq[i];
531 spin_lock(&rq->lock);
532 list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
533 if (bad->s_fence->scheduled.context == entity->fence_context) {
534 if (atomic_read(&bad->karma) > bad->sched->hang_limit)
536 atomic_set(entity->guilty, 1);
540 spin_unlock(&rq->lock);
541 if (&entity->list != &rq->entities)
546 EXPORT_SYMBOL(drm_sched_hw_job_reset);
548 void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
550 struct drm_sched_job *s_job, *tmp;
551 bool found_guilty = false;
554 spin_lock(&sched->job_list_lock);
555 s_job = list_first_entry_or_null(&sched->ring_mirror_list,
556 struct drm_sched_job, node);
557 if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
558 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
560 list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
561 struct drm_sched_fence *s_fence = s_job->s_fence;
562 struct dma_fence *fence;
563 uint64_t guilty_context;
565 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
567 guilty_context = s_job->s_fence->scheduled.context;
570 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
571 dma_fence_set_error(&s_fence->finished, -ECANCELED);
573 spin_unlock(&sched->job_list_lock);
574 fence = sched->ops->run_job(s_job);
575 atomic_inc(&sched->hw_rq_count);
578 s_fence->parent = dma_fence_get(fence);
579 r = dma_fence_add_callback(fence, &s_fence->cb,
580 drm_sched_process_job);
582 drm_sched_process_job(fence, &s_fence->cb);
584 DRM_ERROR("fence add callback failed (%d)\n",
586 dma_fence_put(fence);
588 drm_sched_process_job(NULL, &s_fence->cb);
590 spin_lock(&sched->job_list_lock);
592 spin_unlock(&sched->job_list_lock);
594 EXPORT_SYMBOL(drm_sched_job_recovery);
597 * Init a sched_job with basic field
599 * Note: Refer to drm_sched_entity_push_job documentation
600 * for locking considerations.
602 int drm_sched_job_init(struct drm_sched_job *job,
603 struct drm_gpu_scheduler *sched,
604 struct drm_sched_entity *entity,
608 job->entity = entity;
609 job->s_priority = entity->rq - sched->sched_rq;
610 job->s_fence = drm_sched_fence_create(entity, owner);
613 job->id = atomic64_inc_return(&sched->job_id_count);
615 INIT_WORK(&job->finish_work, drm_sched_job_finish);
616 INIT_LIST_HEAD(&job->node);
617 INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
621 EXPORT_SYMBOL(drm_sched_job_init);
624 * Return ture if we can push more jobs to the hw.
626 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
628 return atomic_read(&sched->hw_rq_count) <
629 sched->hw_submission_limit;
633 * Wake up the scheduler when it is ready
635 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
637 if (drm_sched_ready(sched))
638 wake_up_interruptible(&sched->wake_up_worker);
642 * Select next entity to process
644 static struct drm_sched_entity *
645 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
647 struct drm_sched_entity *entity;
650 if (!drm_sched_ready(sched))
653 /* Kernel run queue has higher priority than normal run queue*/
654 for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
655 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
663 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
665 struct drm_sched_fence *s_fence =
666 container_of(cb, struct drm_sched_fence, cb);
667 struct drm_gpu_scheduler *sched = s_fence->sched;
669 dma_fence_get(&s_fence->finished);
670 atomic_dec(&sched->hw_rq_count);
671 drm_sched_fence_finished(s_fence);
673 trace_drm_sched_process_job(s_fence);
674 dma_fence_put(&s_fence->finished);
675 wake_up_interruptible(&sched->wake_up_worker);
678 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
680 if (kthread_should_park()) {
688 static int drm_sched_main(void *param)
690 struct sched_param sparam = {.sched_priority = 1};
691 struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
694 sched_setscheduler(current, SCHED_FIFO, &sparam);
696 while (!kthread_should_stop()) {
697 struct drm_sched_entity *entity = NULL;
698 struct drm_sched_fence *s_fence;
699 struct drm_sched_job *sched_job;
700 struct dma_fence *fence;
702 wait_event_interruptible(sched->wake_up_worker,
703 (!drm_sched_blocked(sched) &&
704 (entity = drm_sched_select_entity(sched))) ||
705 kthread_should_stop());
710 sched_job = drm_sched_entity_pop_job(entity);
714 s_fence = sched_job->s_fence;
716 atomic_inc(&sched->hw_rq_count);
717 drm_sched_job_begin(sched_job);
719 fence = sched->ops->run_job(sched_job);
720 drm_sched_fence_scheduled(s_fence);
723 s_fence->parent = dma_fence_get(fence);
724 r = dma_fence_add_callback(fence, &s_fence->cb,
725 drm_sched_process_job);
727 drm_sched_process_job(fence, &s_fence->cb);
729 DRM_ERROR("fence add callback failed (%d)\n",
731 dma_fence_put(fence);
733 drm_sched_process_job(NULL, &s_fence->cb);
736 wake_up(&sched->job_scheduled);
742 * Init a gpu scheduler instance
744 * @sched The pointer to the scheduler
745 * @ops The backend operations for this scheduler.
746 * @hw_submissions Number of hw submissions to do.
747 * @name Name used for debugging
749 * Return 0 on success, otherwise error code.
751 int drm_sched_init(struct drm_gpu_scheduler *sched,
752 const struct drm_sched_backend_ops *ops,
753 unsigned hw_submission,
760 sched->hw_submission_limit = hw_submission;
762 sched->timeout = timeout;
763 sched->hang_limit = hang_limit;
764 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
765 drm_sched_rq_init(&sched->sched_rq[i]);
767 init_waitqueue_head(&sched->wake_up_worker);
768 init_waitqueue_head(&sched->job_scheduled);
769 INIT_LIST_HEAD(&sched->ring_mirror_list);
770 spin_lock_init(&sched->job_list_lock);
771 atomic_set(&sched->hw_rq_count, 0);
772 atomic64_set(&sched->job_id_count, 0);
774 /* Each scheduler will run on a seperate kernel thread */
775 sched->thread = kthread_run(drm_sched_main, sched, sched->name);
776 if (IS_ERR(sched->thread)) {
777 DRM_ERROR("Failed to create scheduler for %s.\n", name);
778 return PTR_ERR(sched->thread);
783 EXPORT_SYMBOL(drm_sched_init);
786 * Destroy a gpu scheduler
788 * @sched The pointer to the scheduler
790 void drm_sched_fini(struct drm_gpu_scheduler *sched)
793 kthread_stop(sched->thread);
795 EXPORT_SYMBOL(drm_sched_fini);