]> Git Repo - linux.git/blob - drivers/gpu/drm/scheduler/sched_main.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / scheduler / sched_main.c
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 /**
25  * DOC: Overview
26  *
27  * The GPU scheduler provides entities which allow userspace to push jobs
28  * into software queues which are then scheduled on a hardware run queue.
29  * The software queues have a priority among them. The scheduler selects the entities
30  * from the run queue using a FIFO. The scheduler provides dependency handling
31  * features among jobs. The driver is supposed to provide callback functions for
32  * backend operations to the scheduler like submitting a job to hardware run queue,
33  * returning the dependencies of a job etc.
34  *
35  * The organisation of the scheduler is the following:
36  *
37  * 1. Each hw run queue has one scheduler
38  * 2. Each scheduler has multiple run queues with different priorities
39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40  * 3. Each scheduler run queue has a queue of entities to schedule
41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42  *    the hardware.
43  *
44  * The jobs in an entity are always scheduled in the order in which they were pushed.
45  *
46  * Note that once a job was taken from the entities queue and pushed to the
47  * hardware, i.e. the pending queue, the entity must not be referenced anymore
48  * through the jobs entity pointer.
49  */
50
51 /**
52  * DOC: Flow Control
53  *
54  * The DRM GPU scheduler provides a flow control mechanism to regulate the rate
55  * in which the jobs fetched from scheduler entities are executed.
56  *
57  * In this context the &drm_gpu_scheduler keeps track of a driver specified
58  * credit limit representing the capacity of this scheduler and a credit count;
59  * every &drm_sched_job carries a driver specified number of credits.
60  *
61  * Once a job is executed (but not yet finished), the job's credits contribute
62  * to the scheduler's credit count until the job is finished. If by executing
63  * one more job the scheduler's credit count would exceed the scheduler's
64  * credit limit, the job won't be executed. Instead, the scheduler will wait
65  * until the credit count has decreased enough to not overflow its credit limit.
66  * This implies waiting for previously executed jobs.
67  *
68  * Optionally, drivers may register a callback (update_job_credits) provided by
69  * struct drm_sched_backend_ops to update the job's credits dynamically. The
70  * scheduler executes this callback every time the scheduler considers a job for
71  * execution and subsequently checks whether the job fits the scheduler's credit
72  * limit.
73  */
74
75 #include <linux/wait.h>
76 #include <linux/sched.h>
77 #include <linux/completion.h>
78 #include <linux/dma-resv.h>
79 #include <uapi/linux/sched/types.h>
80
81 #include <drm/drm_print.h>
82 #include <drm/drm_gem.h>
83 #include <drm/drm_syncobj.h>
84 #include <drm/gpu_scheduler.h>
85 #include <drm/spsc_queue.h>
86
87 #define CREATE_TRACE_POINTS
88 #include "gpu_scheduler_trace.h"
89
90 #ifdef CONFIG_LOCKDEP
91 static struct lockdep_map drm_sched_lockdep_map = {
92         .name = "drm_sched_lockdep_map"
93 };
94 #endif
95
96 #define to_drm_sched_job(sched_job)             \
97                 container_of((sched_job), struct drm_sched_job, queue_node)
98
99 int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
100
101 /**
102  * DOC: sched_policy (int)
103  * Used to override default entities scheduling policy in a run queue.
104  */
105 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
106 module_param_named(sched_policy, drm_sched_policy, int, 0444);
107
108 static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
109 {
110         u32 credits;
111
112         drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit,
113                                               atomic_read(&sched->credit_count),
114                                               &credits));
115
116         return credits;
117 }
118
119 /**
120  * drm_sched_can_queue -- Can we queue more to the hardware?
121  * @sched: scheduler instance
122  * @entity: the scheduler entity
123  *
124  * Return true if we can push at least one more job from @entity, false
125  * otherwise.
126  */
127 static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
128                                 struct drm_sched_entity *entity)
129 {
130         struct drm_sched_job *s_job;
131
132         s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
133         if (!s_job)
134                 return false;
135
136         if (sched->ops->update_job_credits) {
137                 s_job->credits = sched->ops->update_job_credits(s_job);
138
139                 drm_WARN(sched, !s_job->credits,
140                          "Jobs with zero credits bypass job-flow control.\n");
141         }
142
143         /* If a job exceeds the credit limit, truncate it to the credit limit
144          * itself to guarantee forward progress.
145          */
146         if (drm_WARN(sched, s_job->credits > sched->credit_limit,
147                      "Jobs may not exceed the credit limit, truncate.\n"))
148                 s_job->credits = sched->credit_limit;
149
150         return drm_sched_available_credits(sched) >= s_job->credits;
151 }
152
153 static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
154                                                             const struct rb_node *b)
155 {
156         struct drm_sched_entity *ent_a =  rb_entry((a), struct drm_sched_entity, rb_tree_node);
157         struct drm_sched_entity *ent_b =  rb_entry((b), struct drm_sched_entity, rb_tree_node);
158
159         return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
160 }
161
162 static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
163                                             struct drm_sched_rq *rq)
164 {
165         if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
166                 rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
167                 RB_CLEAR_NODE(&entity->rb_tree_node);
168         }
169 }
170
171 void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
172                                      struct drm_sched_rq *rq,
173                                      ktime_t ts)
174 {
175         /*
176          * Both locks need to be grabbed, one to protect from entity->rq change
177          * for entity from within concurrent drm_sched_entity_select_rq and the
178          * other to update the rb tree structure.
179          */
180         lockdep_assert_held(&entity->lock);
181         lockdep_assert_held(&rq->lock);
182
183         drm_sched_rq_remove_fifo_locked(entity, rq);
184
185         entity->oldest_job_waiting = ts;
186
187         rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
188                       drm_sched_entity_compare_before);
189 }
190
191 /**
192  * drm_sched_rq_init - initialize a given run queue struct
193  *
194  * @sched: scheduler instance to associate with this run queue
195  * @rq: scheduler run queue
196  *
197  * Initializes a scheduler runqueue.
198  */
199 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
200                               struct drm_sched_rq *rq)
201 {
202         spin_lock_init(&rq->lock);
203         INIT_LIST_HEAD(&rq->entities);
204         rq->rb_tree_root = RB_ROOT_CACHED;
205         rq->current_entity = NULL;
206         rq->sched = sched;
207 }
208
209 /**
210  * drm_sched_rq_add_entity - add an entity
211  *
212  * @rq: scheduler run queue
213  * @entity: scheduler entity
214  *
215  * Adds a scheduler entity to the run queue.
216  */
217 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
218                              struct drm_sched_entity *entity)
219 {
220         lockdep_assert_held(&entity->lock);
221         lockdep_assert_held(&rq->lock);
222
223         if (!list_empty(&entity->list))
224                 return;
225
226         atomic_inc(rq->sched->score);
227         list_add_tail(&entity->list, &rq->entities);
228 }
229
230 /**
231  * drm_sched_rq_remove_entity - remove an entity
232  *
233  * @rq: scheduler run queue
234  * @entity: scheduler entity
235  *
236  * Removes a scheduler entity from the run queue.
237  */
238 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
239                                 struct drm_sched_entity *entity)
240 {
241         lockdep_assert_held(&entity->lock);
242
243         if (list_empty(&entity->list))
244                 return;
245
246         spin_lock(&rq->lock);
247
248         atomic_dec(rq->sched->score);
249         list_del_init(&entity->list);
250
251         if (rq->current_entity == entity)
252                 rq->current_entity = NULL;
253
254         if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
255                 drm_sched_rq_remove_fifo_locked(entity, rq);
256
257         spin_unlock(&rq->lock);
258 }
259
260 /**
261  * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
262  *
263  * @sched: the gpu scheduler
264  * @rq: scheduler run queue to check.
265  *
266  * Try to find the next ready entity.
267  *
268  * Return an entity if one is found; return an error-pointer (!NULL) if an
269  * entity was ready, but the scheduler had insufficient credits to accommodate
270  * its job; return NULL, if no ready entity was found.
271  */
272 static struct drm_sched_entity *
273 drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
274                               struct drm_sched_rq *rq)
275 {
276         struct drm_sched_entity *entity;
277
278         spin_lock(&rq->lock);
279
280         entity = rq->current_entity;
281         if (entity) {
282                 list_for_each_entry_continue(entity, &rq->entities, list) {
283                         if (drm_sched_entity_is_ready(entity)) {
284                                 /* If we can't queue yet, preserve the current
285                                  * entity in terms of fairness.
286                                  */
287                                 if (!drm_sched_can_queue(sched, entity)) {
288                                         spin_unlock(&rq->lock);
289                                         return ERR_PTR(-ENOSPC);
290                                 }
291
292                                 rq->current_entity = entity;
293                                 reinit_completion(&entity->entity_idle);
294                                 spin_unlock(&rq->lock);
295                                 return entity;
296                         }
297                 }
298         }
299
300         list_for_each_entry(entity, &rq->entities, list) {
301                 if (drm_sched_entity_is_ready(entity)) {
302                         /* If we can't queue yet, preserve the current entity in
303                          * terms of fairness.
304                          */
305                         if (!drm_sched_can_queue(sched, entity)) {
306                                 spin_unlock(&rq->lock);
307                                 return ERR_PTR(-ENOSPC);
308                         }
309
310                         rq->current_entity = entity;
311                         reinit_completion(&entity->entity_idle);
312                         spin_unlock(&rq->lock);
313                         return entity;
314                 }
315
316                 if (entity == rq->current_entity)
317                         break;
318         }
319
320         spin_unlock(&rq->lock);
321
322         return NULL;
323 }
324
325 /**
326  * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
327  *
328  * @sched: the gpu scheduler
329  * @rq: scheduler run queue to check.
330  *
331  * Find oldest waiting ready entity.
332  *
333  * Return an entity if one is found; return an error-pointer (!NULL) if an
334  * entity was ready, but the scheduler had insufficient credits to accommodate
335  * its job; return NULL, if no ready entity was found.
336  */
337 static struct drm_sched_entity *
338 drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
339                                 struct drm_sched_rq *rq)
340 {
341         struct rb_node *rb;
342
343         spin_lock(&rq->lock);
344         for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
345                 struct drm_sched_entity *entity;
346
347                 entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
348                 if (drm_sched_entity_is_ready(entity)) {
349                         /* If we can't queue yet, preserve the current entity in
350                          * terms of fairness.
351                          */
352                         if (!drm_sched_can_queue(sched, entity)) {
353                                 spin_unlock(&rq->lock);
354                                 return ERR_PTR(-ENOSPC);
355                         }
356
357                         reinit_completion(&entity->entity_idle);
358                         break;
359                 }
360         }
361         spin_unlock(&rq->lock);
362
363         return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
364 }
365
366 /**
367  * drm_sched_run_job_queue - enqueue run-job work
368  * @sched: scheduler instance
369  */
370 static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
371 {
372         if (!READ_ONCE(sched->pause_submit))
373                 queue_work(sched->submit_wq, &sched->work_run_job);
374 }
375
376 /**
377  * __drm_sched_run_free_queue - enqueue free-job work
378  * @sched: scheduler instance
379  */
380 static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
381 {
382         if (!READ_ONCE(sched->pause_submit))
383                 queue_work(sched->submit_wq, &sched->work_free_job);
384 }
385
386 /**
387  * drm_sched_run_free_queue - enqueue free-job work if ready
388  * @sched: scheduler instance
389  */
390 static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
391 {
392         struct drm_sched_job *job;
393
394         spin_lock(&sched->job_list_lock);
395         job = list_first_entry_or_null(&sched->pending_list,
396                                        struct drm_sched_job, list);
397         if (job && dma_fence_is_signaled(&job->s_fence->finished))
398                 __drm_sched_run_free_queue(sched);
399         spin_unlock(&sched->job_list_lock);
400 }
401
402 /**
403  * drm_sched_job_done - complete a job
404  * @s_job: pointer to the job which is done
405  *
406  * Finish the job's fence and wake up the worker thread.
407  */
408 static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
409 {
410         struct drm_sched_fence *s_fence = s_job->s_fence;
411         struct drm_gpu_scheduler *sched = s_fence->sched;
412
413         atomic_sub(s_job->credits, &sched->credit_count);
414         atomic_dec(sched->score);
415
416         trace_drm_sched_process_job(s_fence);
417
418         dma_fence_get(&s_fence->finished);
419         drm_sched_fence_finished(s_fence, result);
420         dma_fence_put(&s_fence->finished);
421         __drm_sched_run_free_queue(sched);
422 }
423
424 /**
425  * drm_sched_job_done_cb - the callback for a done job
426  * @f: fence
427  * @cb: fence callbacks
428  */
429 static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
430 {
431         struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
432
433         drm_sched_job_done(s_job, f->error);
434 }
435
436 /**
437  * drm_sched_start_timeout - start timeout for reset worker
438  *
439  * @sched: scheduler instance to start the worker for
440  *
441  * Start the timeout for the given scheduler.
442  */
443 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
444 {
445         lockdep_assert_held(&sched->job_list_lock);
446
447         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
448             !list_empty(&sched->pending_list))
449                 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
450 }
451
452 static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
453 {
454         spin_lock(&sched->job_list_lock);
455         drm_sched_start_timeout(sched);
456         spin_unlock(&sched->job_list_lock);
457 }
458
459 /**
460  * drm_sched_tdr_queue_imm: - immediately start job timeout handler
461  *
462  * @sched: scheduler for which the timeout handling should be started.
463  *
464  * Start timeout handling immediately for the named scheduler.
465  */
466 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
467 {
468         spin_lock(&sched->job_list_lock);
469         sched->timeout = 0;
470         drm_sched_start_timeout(sched);
471         spin_unlock(&sched->job_list_lock);
472 }
473 EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
474
475 /**
476  * drm_sched_fault - immediately start timeout handler
477  *
478  * @sched: scheduler where the timeout handling should be started.
479  *
480  * Start timeout handling immediately when the driver detects a hardware fault.
481  */
482 void drm_sched_fault(struct drm_gpu_scheduler *sched)
483 {
484         if (sched->timeout_wq)
485                 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
486 }
487 EXPORT_SYMBOL(drm_sched_fault);
488
489 /**
490  * drm_sched_suspend_timeout - Suspend scheduler job timeout
491  *
492  * @sched: scheduler instance for which to suspend the timeout
493  *
494  * Suspend the delayed work timeout for the scheduler. This is done by
495  * modifying the delayed work timeout to an arbitrary large value,
496  * MAX_SCHEDULE_TIMEOUT in this case.
497  *
498  * Returns the timeout remaining
499  *
500  */
501 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
502 {
503         unsigned long sched_timeout, now = jiffies;
504
505         sched_timeout = sched->work_tdr.timer.expires;
506
507         /*
508          * Modify the timeout to an arbitrarily large value. This also prevents
509          * the timeout to be restarted when new submissions arrive
510          */
511         if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
512                         && time_after(sched_timeout, now))
513                 return sched_timeout - now;
514         else
515                 return sched->timeout;
516 }
517 EXPORT_SYMBOL(drm_sched_suspend_timeout);
518
519 /**
520  * drm_sched_resume_timeout - Resume scheduler job timeout
521  *
522  * @sched: scheduler instance for which to resume the timeout
523  * @remaining: remaining timeout
524  *
525  * Resume the delayed work timeout for the scheduler.
526  */
527 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
528                 unsigned long remaining)
529 {
530         spin_lock(&sched->job_list_lock);
531
532         if (list_empty(&sched->pending_list))
533                 cancel_delayed_work(&sched->work_tdr);
534         else
535                 mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
536
537         spin_unlock(&sched->job_list_lock);
538 }
539 EXPORT_SYMBOL(drm_sched_resume_timeout);
540
541 static void drm_sched_job_begin(struct drm_sched_job *s_job)
542 {
543         struct drm_gpu_scheduler *sched = s_job->sched;
544
545         spin_lock(&sched->job_list_lock);
546         list_add_tail(&s_job->list, &sched->pending_list);
547         drm_sched_start_timeout(sched);
548         spin_unlock(&sched->job_list_lock);
549 }
550
551 static void drm_sched_job_timedout(struct work_struct *work)
552 {
553         struct drm_gpu_scheduler *sched;
554         struct drm_sched_job *job;
555         enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
556
557         sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
558
559         /* Protects against concurrent deletion in drm_sched_get_finished_job */
560         spin_lock(&sched->job_list_lock);
561         job = list_first_entry_or_null(&sched->pending_list,
562                                        struct drm_sched_job, list);
563
564         if (job) {
565                 /*
566                  * Remove the bad job so it cannot be freed by concurrent
567                  * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
568                  * is parked at which point it's safe.
569                  */
570                 list_del_init(&job->list);
571                 spin_unlock(&sched->job_list_lock);
572
573                 status = job->sched->ops->timedout_job(job);
574
575                 /*
576                  * Guilty job did complete and hence needs to be manually removed
577                  * See drm_sched_stop doc.
578                  */
579                 if (sched->free_guilty) {
580                         job->sched->ops->free_job(job);
581                         sched->free_guilty = false;
582                 }
583         } else {
584                 spin_unlock(&sched->job_list_lock);
585         }
586
587         if (status != DRM_GPU_SCHED_STAT_ENODEV)
588                 drm_sched_start_timeout_unlocked(sched);
589 }
590
591 /**
592  * drm_sched_stop - stop the scheduler
593  *
594  * @sched: scheduler instance
595  * @bad: job which caused the time out
596  *
597  * Stop the scheduler and also removes and frees all completed jobs.
598  * Note: bad job will not be freed as it might be used later and so it's
599  * callers responsibility to release it manually if it's not part of the
600  * pending list any more.
601  *
602  * This function is typically used for reset recovery (see the docu of
603  * drm_sched_backend_ops.timedout_job() for details). Do not call it for
604  * scheduler teardown, i.e., before calling drm_sched_fini().
605  */
606 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
607 {
608         struct drm_sched_job *s_job, *tmp;
609
610         drm_sched_wqueue_stop(sched);
611
612         /*
613          * Reinsert back the bad job here - now it's safe as
614          * drm_sched_get_finished_job cannot race against us and release the
615          * bad job at this point - we parked (waited for) any in progress
616          * (earlier) cleanups and drm_sched_get_finished_job will not be called
617          * now until the scheduler thread is unparked.
618          */
619         if (bad && bad->sched == sched)
620                 /*
621                  * Add at the head of the queue to reflect it was the earliest
622                  * job extracted.
623                  */
624                 list_add(&bad->list, &sched->pending_list);
625
626         /*
627          * Iterate the job list from later to  earlier one and either deactive
628          * their HW callbacks or remove them from pending list if they already
629          * signaled.
630          * This iteration is thread safe as sched thread is stopped.
631          */
632         list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
633                                          list) {
634                 if (s_job->s_fence->parent &&
635                     dma_fence_remove_callback(s_job->s_fence->parent,
636                                               &s_job->cb)) {
637                         dma_fence_put(s_job->s_fence->parent);
638                         s_job->s_fence->parent = NULL;
639                         atomic_sub(s_job->credits, &sched->credit_count);
640                 } else {
641                         /*
642                          * remove job from pending_list.
643                          * Locking here is for concurrent resume timeout
644                          */
645                         spin_lock(&sched->job_list_lock);
646                         list_del_init(&s_job->list);
647                         spin_unlock(&sched->job_list_lock);
648
649                         /*
650                          * Wait for job's HW fence callback to finish using s_job
651                          * before releasing it.
652                          *
653                          * Job is still alive so fence refcount at least 1
654                          */
655                         dma_fence_wait(&s_job->s_fence->finished, false);
656
657                         /*
658                          * We must keep bad job alive for later use during
659                          * recovery by some of the drivers but leave a hint
660                          * that the guilty job must be released.
661                          */
662                         if (bad != s_job)
663                                 sched->ops->free_job(s_job);
664                         else
665                                 sched->free_guilty = true;
666                 }
667         }
668
669         /*
670          * Stop pending timer in flight as we rearm it in  drm_sched_start. This
671          * avoids the pending timeout work in progress to fire right away after
672          * this TDR finished and before the newly restarted jobs had a
673          * chance to complete.
674          */
675         cancel_delayed_work(&sched->work_tdr);
676 }
677 EXPORT_SYMBOL(drm_sched_stop);
678
679 /**
680  * drm_sched_start - recover jobs after a reset
681  *
682  * @sched: scheduler instance
683  * @errno: error to set on the pending fences
684  *
685  * This function is typically used for reset recovery (see the docu of
686  * drm_sched_backend_ops.timedout_job() for details). Do not call it for
687  * scheduler startup. The scheduler itself is fully operational after
688  * drm_sched_init() succeeded.
689  */
690 void drm_sched_start(struct drm_gpu_scheduler *sched, int errno)
691 {
692         struct drm_sched_job *s_job, *tmp;
693
694         /*
695          * Locking the list is not required here as the sched thread is parked
696          * so no new jobs are being inserted or removed. Also concurrent
697          * GPU recovers can't run in parallel.
698          */
699         list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
700                 struct dma_fence *fence = s_job->s_fence->parent;
701
702                 atomic_add(s_job->credits, &sched->credit_count);
703
704                 if (!fence) {
705                         drm_sched_job_done(s_job, errno ?: -ECANCELED);
706                         continue;
707                 }
708
709                 if (dma_fence_add_callback(fence, &s_job->cb,
710                                            drm_sched_job_done_cb))
711                         drm_sched_job_done(s_job, fence->error ?: errno);
712         }
713
714         drm_sched_start_timeout_unlocked(sched);
715         drm_sched_wqueue_start(sched);
716 }
717 EXPORT_SYMBOL(drm_sched_start);
718
719 /**
720  * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
721  *
722  * @sched: scheduler instance
723  *
724  * Re-submitting jobs was a concept AMD came up as cheap way to implement
725  * recovery after a job timeout.
726  *
727  * This turned out to be not working very well. First of all there are many
728  * problem with the dma_fence implementation and requirements. Either the
729  * implementation is risking deadlocks with core memory management or violating
730  * documented implementation details of the dma_fence object.
731  *
732  * Drivers can still save and restore their state for recovery operations, but
733  * we shouldn't make this a general scheduler feature around the dma_fence
734  * interface.
735  */
736 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
737 {
738         struct drm_sched_job *s_job, *tmp;
739         uint64_t guilty_context;
740         bool found_guilty = false;
741         struct dma_fence *fence;
742
743         list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
744                 struct drm_sched_fence *s_fence = s_job->s_fence;
745
746                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
747                         found_guilty = true;
748                         guilty_context = s_job->s_fence->scheduled.context;
749                 }
750
751                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
752                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
753
754                 fence = sched->ops->run_job(s_job);
755
756                 if (IS_ERR_OR_NULL(fence)) {
757                         if (IS_ERR(fence))
758                                 dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
759
760                         s_job->s_fence->parent = NULL;
761                 } else {
762
763                         s_job->s_fence->parent = dma_fence_get(fence);
764
765                         /* Drop for orignal kref_init */
766                         dma_fence_put(fence);
767                 }
768         }
769 }
770 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
771
772 /**
773  * drm_sched_job_init - init a scheduler job
774  * @job: scheduler job to init
775  * @entity: scheduler entity to use
776  * @credits: the number of credits this job contributes to the schedulers
777  * credit limit
778  * @owner: job owner for debugging
779  *
780  * Refer to drm_sched_entity_push_job() documentation
781  * for locking considerations.
782  *
783  * Drivers must make sure drm_sched_job_cleanup() if this function returns
784  * successfully, even when @job is aborted before drm_sched_job_arm() is called.
785  *
786  * Note that this function does not assign a valid value to each struct member
787  * of struct drm_sched_job. Take a look at that struct's documentation to see
788  * who sets which struct member with what lifetime.
789  *
790  * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
791  * has died, which can mean that there's no valid runqueue for a @entity.
792  * This function returns -ENOENT in this case (which probably should be -EIO as
793  * a more meanigful return value).
794  *
795  * Returns 0 for success, negative error code otherwise.
796  */
797 int drm_sched_job_init(struct drm_sched_job *job,
798                        struct drm_sched_entity *entity,
799                        u32 credits, void *owner)
800 {
801         if (!entity->rq) {
802                 /* This will most likely be followed by missing frames
803                  * or worse--a blank screen--leave a trail in the
804                  * logs, so this can be debugged easier.
805                  */
806                 drm_err(job->sched, "%s: entity has no rq!\n", __func__);
807                 return -ENOENT;
808         }
809
810         if (unlikely(!credits)) {
811                 pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
812                 return -EINVAL;
813         }
814
815         /*
816          * We don't know for sure how the user has allocated. Thus, zero the
817          * struct so that unallowed (i.e., too early) usage of pointers that
818          * this function does not set is guaranteed to lead to a NULL pointer
819          * exception instead of UB.
820          */
821         memset(job, 0, sizeof(*job));
822
823         job->entity = entity;
824         job->credits = credits;
825         job->s_fence = drm_sched_fence_alloc(entity, owner);
826         if (!job->s_fence)
827                 return -ENOMEM;
828
829         INIT_LIST_HEAD(&job->list);
830
831         xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
832
833         return 0;
834 }
835 EXPORT_SYMBOL(drm_sched_job_init);
836
837 /**
838  * drm_sched_job_arm - arm a scheduler job for execution
839  * @job: scheduler job to arm
840  *
841  * This arms a scheduler job for execution. Specifically it initializes the
842  * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
843  * or other places that need to track the completion of this job.
844  *
845  * Refer to drm_sched_entity_push_job() documentation for locking
846  * considerations.
847  *
848  * This can only be called if drm_sched_job_init() succeeded.
849  */
850 void drm_sched_job_arm(struct drm_sched_job *job)
851 {
852         struct drm_gpu_scheduler *sched;
853         struct drm_sched_entity *entity = job->entity;
854
855         BUG_ON(!entity);
856         drm_sched_entity_select_rq(entity);
857         sched = entity->rq->sched;
858
859         job->sched = sched;
860         job->s_priority = entity->priority;
861         job->id = atomic64_inc_return(&sched->job_id_count);
862
863         drm_sched_fence_init(job->s_fence, job->entity);
864 }
865 EXPORT_SYMBOL(drm_sched_job_arm);
866
867 /**
868  * drm_sched_job_add_dependency - adds the fence as a job dependency
869  * @job: scheduler job to add the dependencies to
870  * @fence: the dma_fence to add to the list of dependencies.
871  *
872  * Note that @fence is consumed in both the success and error cases.
873  *
874  * Returns:
875  * 0 on success, or an error on failing to expand the array.
876  */
877 int drm_sched_job_add_dependency(struct drm_sched_job *job,
878                                  struct dma_fence *fence)
879 {
880         struct dma_fence *entry;
881         unsigned long index;
882         u32 id = 0;
883         int ret;
884
885         if (!fence)
886                 return 0;
887
888         /* Deduplicate if we already depend on a fence from the same context.
889          * This lets the size of the array of deps scale with the number of
890          * engines involved, rather than the number of BOs.
891          */
892         xa_for_each(&job->dependencies, index, entry) {
893                 if (entry->context != fence->context)
894                         continue;
895
896                 if (dma_fence_is_later(fence, entry)) {
897                         dma_fence_put(entry);
898                         xa_store(&job->dependencies, index, fence, GFP_KERNEL);
899                 } else {
900                         dma_fence_put(fence);
901                 }
902                 return 0;
903         }
904
905         ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
906         if (ret != 0)
907                 dma_fence_put(fence);
908
909         return ret;
910 }
911 EXPORT_SYMBOL(drm_sched_job_add_dependency);
912
913 /**
914  * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
915  * @job: scheduler job to add the dependencies to
916  * @file: drm file private pointer
917  * @handle: syncobj handle to lookup
918  * @point: timeline point
919  *
920  * This adds the fence matching the given syncobj to @job.
921  *
922  * Returns:
923  * 0 on success, or an error on failing to expand the array.
924  */
925 int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
926                                          struct drm_file *file,
927                                          u32 handle,
928                                          u32 point)
929 {
930         struct dma_fence *fence;
931         int ret;
932
933         ret = drm_syncobj_find_fence(file, handle, point, 0, &fence);
934         if (ret)
935                 return ret;
936
937         return drm_sched_job_add_dependency(job, fence);
938 }
939 EXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency);
940
941 /**
942  * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
943  * @job: scheduler job to add the dependencies to
944  * @resv: the dma_resv object to get the fences from
945  * @usage: the dma_resv_usage to use to filter the fences
946  *
947  * This adds all fences matching the given usage from @resv to @job.
948  * Must be called with the @resv lock held.
949  *
950  * Returns:
951  * 0 on success, or an error on failing to expand the array.
952  */
953 int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
954                                         struct dma_resv *resv,
955                                         enum dma_resv_usage usage)
956 {
957         struct dma_resv_iter cursor;
958         struct dma_fence *fence;
959         int ret;
960
961         dma_resv_assert_held(resv);
962
963         dma_resv_for_each_fence(&cursor, resv, usage, fence) {
964                 /* Make sure to grab an additional ref on the added fence */
965                 dma_fence_get(fence);
966                 ret = drm_sched_job_add_dependency(job, fence);
967                 if (ret) {
968                         dma_fence_put(fence);
969                         return ret;
970                 }
971         }
972         return 0;
973 }
974 EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
975
976 /**
977  * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
978  *   dependencies
979  * @job: scheduler job to add the dependencies to
980  * @obj: the gem object to add new dependencies from.
981  * @write: whether the job might write the object (so we need to depend on
982  * shared fences in the reservation object).
983  *
984  * This should be called after drm_gem_lock_reservations() on your array of
985  * GEM objects used in the job but before updating the reservations with your
986  * own fences.
987  *
988  * Returns:
989  * 0 on success, or an error on failing to expand the array.
990  */
991 int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
992                                             struct drm_gem_object *obj,
993                                             bool write)
994 {
995         return drm_sched_job_add_resv_dependencies(job, obj->resv,
996                                                    dma_resv_usage_rw(write));
997 }
998 EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
999
1000 /**
1001  * drm_sched_job_cleanup - clean up scheduler job resources
1002  * @job: scheduler job to clean up
1003  *
1004  * Cleans up the resources allocated with drm_sched_job_init().
1005  *
1006  * Drivers should call this from their error unwind code if @job is aborted
1007  * before drm_sched_job_arm() is called.
1008  *
1009  * After that point of no return @job is committed to be executed by the
1010  * scheduler, and this function should be called from the
1011  * &drm_sched_backend_ops.free_job callback.
1012  */
1013 void drm_sched_job_cleanup(struct drm_sched_job *job)
1014 {
1015         struct dma_fence *fence;
1016         unsigned long index;
1017
1018         if (kref_read(&job->s_fence->finished.refcount)) {
1019                 /* drm_sched_job_arm() has been called */
1020                 dma_fence_put(&job->s_fence->finished);
1021         } else {
1022                 /* aborted job before committing to run it */
1023                 drm_sched_fence_free(job->s_fence);
1024         }
1025
1026         job->s_fence = NULL;
1027
1028         xa_for_each(&job->dependencies, index, fence) {
1029                 dma_fence_put(fence);
1030         }
1031         xa_destroy(&job->dependencies);
1032
1033 }
1034 EXPORT_SYMBOL(drm_sched_job_cleanup);
1035
1036 /**
1037  * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
1038  * @sched: scheduler instance
1039  *
1040  * Wake up the scheduler if we can queue jobs.
1041  */
1042 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
1043 {
1044         drm_sched_run_job_queue(sched);
1045 }
1046
1047 /**
1048  * drm_sched_select_entity - Select next entity to process
1049  *
1050  * @sched: scheduler instance
1051  *
1052  * Return an entity to process or NULL if none are found.
1053  *
1054  * Note, that we break out of the for-loop when "entity" is non-null, which can
1055  * also be an error-pointer--this assures we don't process lower priority
1056  * run-queues. See comments in the respectively called functions.
1057  */
1058 static struct drm_sched_entity *
1059 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
1060 {
1061         struct drm_sched_entity *entity;
1062         int i;
1063
1064         /* Start with the highest priority.
1065          */
1066         for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1067                 entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
1068                         drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
1069                         drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
1070                 if (entity)
1071                         break;
1072         }
1073
1074         return IS_ERR(entity) ? NULL : entity;
1075 }
1076
1077 /**
1078  * drm_sched_get_finished_job - fetch the next finished job to be destroyed
1079  *
1080  * @sched: scheduler instance
1081  *
1082  * Returns the next finished job from the pending list (if there is one)
1083  * ready for it to be destroyed.
1084  */
1085 static struct drm_sched_job *
1086 drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
1087 {
1088         struct drm_sched_job *job, *next;
1089
1090         spin_lock(&sched->job_list_lock);
1091
1092         job = list_first_entry_or_null(&sched->pending_list,
1093                                        struct drm_sched_job, list);
1094
1095         if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
1096                 /* remove job from pending_list */
1097                 list_del_init(&job->list);
1098
1099                 /* cancel this job's TO timer */
1100                 cancel_delayed_work(&sched->work_tdr);
1101                 /* make the scheduled timestamp more accurate */
1102                 next = list_first_entry_or_null(&sched->pending_list,
1103                                                 typeof(*next), list);
1104
1105                 if (next) {
1106                         if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
1107                                      &next->s_fence->scheduled.flags))
1108                                 next->s_fence->scheduled.timestamp =
1109                                         dma_fence_timestamp(&job->s_fence->finished);
1110                         /* start TO timer for next job */
1111                         drm_sched_start_timeout(sched);
1112                 }
1113         } else {
1114                 job = NULL;
1115         }
1116
1117         spin_unlock(&sched->job_list_lock);
1118
1119         return job;
1120 }
1121
1122 /**
1123  * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
1124  * @sched_list: list of drm_gpu_schedulers
1125  * @num_sched_list: number of drm_gpu_schedulers in the sched_list
1126  *
1127  * Returns pointer of the sched with the least load or NULL if none of the
1128  * drm_gpu_schedulers are ready
1129  */
1130 struct drm_gpu_scheduler *
1131 drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
1132                      unsigned int num_sched_list)
1133 {
1134         struct drm_gpu_scheduler *sched, *picked_sched = NULL;
1135         int i;
1136         unsigned int min_score = UINT_MAX, num_score;
1137
1138         for (i = 0; i < num_sched_list; ++i) {
1139                 sched = sched_list[i];
1140
1141                 if (!sched->ready) {
1142                         DRM_WARN("scheduler %s is not ready, skipping",
1143                                  sched->name);
1144                         continue;
1145                 }
1146
1147                 num_score = atomic_read(sched->score);
1148                 if (num_score < min_score) {
1149                         min_score = num_score;
1150                         picked_sched = sched;
1151                 }
1152         }
1153
1154         return picked_sched;
1155 }
1156 EXPORT_SYMBOL(drm_sched_pick_best);
1157
1158 /**
1159  * drm_sched_free_job_work - worker to call free_job
1160  *
1161  * @w: free job work
1162  */
1163 static void drm_sched_free_job_work(struct work_struct *w)
1164 {
1165         struct drm_gpu_scheduler *sched =
1166                 container_of(w, struct drm_gpu_scheduler, work_free_job);
1167         struct drm_sched_job *job;
1168
1169         if (READ_ONCE(sched->pause_submit))
1170                 return;
1171
1172         job = drm_sched_get_finished_job(sched);
1173         if (job)
1174                 sched->ops->free_job(job);
1175
1176         drm_sched_run_free_queue(sched);
1177         drm_sched_run_job_queue(sched);
1178 }
1179
1180 /**
1181  * drm_sched_run_job_work - worker to call run_job
1182  *
1183  * @w: run job work
1184  */
1185 static void drm_sched_run_job_work(struct work_struct *w)
1186 {
1187         struct drm_gpu_scheduler *sched =
1188                 container_of(w, struct drm_gpu_scheduler, work_run_job);
1189         struct drm_sched_entity *entity;
1190         struct dma_fence *fence;
1191         struct drm_sched_fence *s_fence;
1192         struct drm_sched_job *sched_job;
1193         int r;
1194
1195         if (READ_ONCE(sched->pause_submit))
1196                 return;
1197
1198         /* Find entity with a ready job */
1199         entity = drm_sched_select_entity(sched);
1200         if (!entity)
1201                 return; /* No more work */
1202
1203         sched_job = drm_sched_entity_pop_job(entity);
1204         if (!sched_job) {
1205                 complete_all(&entity->entity_idle);
1206                 drm_sched_run_job_queue(sched);
1207                 return;
1208         }
1209
1210         s_fence = sched_job->s_fence;
1211
1212         atomic_add(sched_job->credits, &sched->credit_count);
1213         drm_sched_job_begin(sched_job);
1214
1215         trace_drm_run_job(sched_job, entity);
1216         fence = sched->ops->run_job(sched_job);
1217         complete_all(&entity->entity_idle);
1218         drm_sched_fence_scheduled(s_fence, fence);
1219
1220         if (!IS_ERR_OR_NULL(fence)) {
1221                 /* Drop for original kref_init of the fence */
1222                 dma_fence_put(fence);
1223
1224                 r = dma_fence_add_callback(fence, &sched_job->cb,
1225                                            drm_sched_job_done_cb);
1226                 if (r == -ENOENT)
1227                         drm_sched_job_done(sched_job, fence->error);
1228                 else if (r)
1229                         DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
1230         } else {
1231                 drm_sched_job_done(sched_job, IS_ERR(fence) ?
1232                                    PTR_ERR(fence) : 0);
1233         }
1234
1235         wake_up(&sched->job_scheduled);
1236         drm_sched_run_job_queue(sched);
1237 }
1238
1239 /**
1240  * drm_sched_init - Init a gpu scheduler instance
1241  *
1242  * @sched: scheduler instance
1243  * @ops: backend operations for this scheduler
1244  * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
1245  *             allocated and used
1246  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
1247  * @credit_limit: the number of credits this scheduler can hold from all jobs
1248  * @hang_limit: number of times to allow a job to hang before dropping it
1249  * @timeout: timeout value in jiffies for the scheduler
1250  * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
1251  *              used
1252  * @score: optional score atomic shared with other schedulers
1253  * @name: name used for debugging
1254  * @dev: target &struct device
1255  *
1256  * Return 0 on success, otherwise error code.
1257  */
1258 int drm_sched_init(struct drm_gpu_scheduler *sched,
1259                    const struct drm_sched_backend_ops *ops,
1260                    struct workqueue_struct *submit_wq,
1261                    u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
1262                    long timeout, struct workqueue_struct *timeout_wq,
1263                    atomic_t *score, const char *name, struct device *dev)
1264 {
1265         int i;
1266
1267         sched->ops = ops;
1268         sched->credit_limit = credit_limit;
1269         sched->name = name;
1270         sched->timeout = timeout;
1271         sched->timeout_wq = timeout_wq ? : system_wq;
1272         sched->hang_limit = hang_limit;
1273         sched->score = score ? score : &sched->_score;
1274         sched->dev = dev;
1275
1276         if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
1277                 /* This is a gross violation--tell drivers what the  problem is.
1278                  */
1279                 drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
1280                         __func__);
1281                 return -EINVAL;
1282         } else if (sched->sched_rq) {
1283                 /* Not an error, but warn anyway so drivers can
1284                  * fine-tune their DRM calling order, and return all
1285                  * is good.
1286                  */
1287                 drm_warn(sched, "%s: scheduler already initialized!\n", __func__);
1288                 return 0;
1289         }
1290
1291         if (submit_wq) {
1292                 sched->submit_wq = submit_wq;
1293                 sched->own_submit_wq = false;
1294         } else {
1295 #ifdef CONFIG_LOCKDEP
1296                 sched->submit_wq = alloc_ordered_workqueue_lockdep_map(name,
1297                                                                        WQ_MEM_RECLAIM,
1298                                                                        &drm_sched_lockdep_map);
1299 #else
1300                 sched->submit_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
1301 #endif
1302                 if (!sched->submit_wq)
1303                         return -ENOMEM;
1304
1305                 sched->own_submit_wq = true;
1306         }
1307
1308         sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
1309                                         GFP_KERNEL | __GFP_ZERO);
1310         if (!sched->sched_rq)
1311                 goto Out_check_own;
1312         sched->num_rqs = num_rqs;
1313         for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1314                 sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
1315                 if (!sched->sched_rq[i])
1316                         goto Out_unroll;
1317                 drm_sched_rq_init(sched, sched->sched_rq[i]);
1318         }
1319
1320         init_waitqueue_head(&sched->job_scheduled);
1321         INIT_LIST_HEAD(&sched->pending_list);
1322         spin_lock_init(&sched->job_list_lock);
1323         atomic_set(&sched->credit_count, 0);
1324         INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1325         INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
1326         INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
1327         atomic_set(&sched->_score, 0);
1328         atomic64_set(&sched->job_id_count, 0);
1329         sched->pause_submit = false;
1330
1331         sched->ready = true;
1332         return 0;
1333 Out_unroll:
1334         for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--)
1335                 kfree(sched->sched_rq[i]);
1336
1337         kfree(sched->sched_rq);
1338         sched->sched_rq = NULL;
1339 Out_check_own:
1340         if (sched->own_submit_wq)
1341                 destroy_workqueue(sched->submit_wq);
1342         drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
1343         return -ENOMEM;
1344 }
1345 EXPORT_SYMBOL(drm_sched_init);
1346
1347 /**
1348  * drm_sched_fini - Destroy a gpu scheduler
1349  *
1350  * @sched: scheduler instance
1351  *
1352  * Tears down and cleans up the scheduler.
1353  *
1354  * This stops submission of new jobs to the hardware through
1355  * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job()
1356  * will not be called for all jobs still in drm_gpu_scheduler.pending_list.
1357  * There is no solution for this currently. Thus, it is up to the driver to make
1358  * sure that:
1359  *
1360  *  a) drm_sched_fini() is only called after for all submitted jobs
1361  *     drm_sched_backend_ops.free_job() has been called or that
1362  *  b) the jobs for which drm_sched_backend_ops.free_job() has not been called
1363  *     after drm_sched_fini() ran are freed manually.
1364  *
1365  * FIXME: Take care of the above problem and prevent this function from leaking
1366  * the jobs in drm_gpu_scheduler.pending_list under any circumstances.
1367  */
1368 void drm_sched_fini(struct drm_gpu_scheduler *sched)
1369 {
1370         struct drm_sched_entity *s_entity;
1371         int i;
1372
1373         drm_sched_wqueue_stop(sched);
1374
1375         for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1376                 struct drm_sched_rq *rq = sched->sched_rq[i];
1377
1378                 spin_lock(&rq->lock);
1379                 list_for_each_entry(s_entity, &rq->entities, list)
1380                         /*
1381                          * Prevents reinsertion and marks job_queue as idle,
1382                          * it will be removed from the rq in drm_sched_entity_fini()
1383                          * eventually
1384                          */
1385                         s_entity->stopped = true;
1386                 spin_unlock(&rq->lock);
1387                 kfree(sched->sched_rq[i]);
1388         }
1389
1390         /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
1391         wake_up_all(&sched->job_scheduled);
1392
1393         /* Confirm no work left behind accessing device structures */
1394         cancel_delayed_work_sync(&sched->work_tdr);
1395
1396         if (sched->own_submit_wq)
1397                 destroy_workqueue(sched->submit_wq);
1398         sched->ready = false;
1399         kfree(sched->sched_rq);
1400         sched->sched_rq = NULL;
1401 }
1402 EXPORT_SYMBOL(drm_sched_fini);
1403
1404 /**
1405  * drm_sched_increase_karma - Update sched_entity guilty flag
1406  *
1407  * @bad: The job guilty of time out
1408  *
1409  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
1410  * limit of the scheduler then the respective sched entity is marked guilty and
1411  * jobs from it will not be scheduled further
1412  */
1413 void drm_sched_increase_karma(struct drm_sched_job *bad)
1414 {
1415         int i;
1416         struct drm_sched_entity *tmp;
1417         struct drm_sched_entity *entity;
1418         struct drm_gpu_scheduler *sched = bad->sched;
1419
1420         /* don't change @bad's karma if it's from KERNEL RQ,
1421          * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1422          * corrupt but keep in mind that kernel jobs always considered good.
1423          */
1424         if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1425                 atomic_inc(&bad->karma);
1426
1427                 for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) {
1428                         struct drm_sched_rq *rq = sched->sched_rq[i];
1429
1430                         spin_lock(&rq->lock);
1431                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1432                                 if (bad->s_fence->scheduled.context ==
1433                                     entity->fence_context) {
1434                                         if (entity->guilty)
1435                                                 atomic_set(entity->guilty, 1);
1436                                         break;
1437                                 }
1438                         }
1439                         spin_unlock(&rq->lock);
1440                         if (&entity->list != &rq->entities)
1441                                 break;
1442                 }
1443         }
1444 }
1445 EXPORT_SYMBOL(drm_sched_increase_karma);
1446
1447 /**
1448  * drm_sched_wqueue_ready - Is the scheduler ready for submission
1449  *
1450  * @sched: scheduler instance
1451  *
1452  * Returns true if submission is ready
1453  */
1454 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
1455 {
1456         return sched->ready;
1457 }
1458 EXPORT_SYMBOL(drm_sched_wqueue_ready);
1459
1460 /**
1461  * drm_sched_wqueue_stop - stop scheduler submission
1462  * @sched: scheduler instance
1463  *
1464  * Stops the scheduler from pulling new jobs from entities. It also stops
1465  * freeing jobs automatically through drm_sched_backend_ops.free_job().
1466  */
1467 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
1468 {
1469         WRITE_ONCE(sched->pause_submit, true);
1470         cancel_work_sync(&sched->work_run_job);
1471         cancel_work_sync(&sched->work_free_job);
1472 }
1473 EXPORT_SYMBOL(drm_sched_wqueue_stop);
1474
1475 /**
1476  * drm_sched_wqueue_start - start scheduler submission
1477  * @sched: scheduler instance
1478  *
1479  * Restarts the scheduler after drm_sched_wqueue_stop() has stopped it.
1480  *
1481  * This function is not necessary for 'conventional' startup. The scheduler is
1482  * fully operational after drm_sched_init() succeeded.
1483  */
1484 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
1485 {
1486         WRITE_ONCE(sched->pause_submit, false);
1487         queue_work(sched->submit_wq, &sched->work_run_job);
1488         queue_work(sched->submit_wq, &sched->work_free_job);
1489 }
1490 EXPORT_SYMBOL(drm_sched_wqueue_start);
This page took 0.119283 seconds and 4 git commands to generate.