drivers/gpu/drm/panthor/panthor_sched.c

   1 // SPDX-License-Identifier: GPL-2.0 or MIT
   2 /* Copyright 2023 Collabora ltd. */
   3
   4 #include <drm/drm_drv.h>
   5 #include <drm/drm_exec.h>
   6 #include <drm/drm_gem_shmem_helper.h>
   7 #include <drm/drm_managed.h>
   8 #include <drm/gpu_scheduler.h>
   9 #include <drm/panthor_drm.h>
  10
  11 #include <linux/build_bug.h>
  12 #include <linux/clk.h>
  13 #include <linux/delay.h>
  14 #include <linux/dma-mapping.h>
  15 #include <linux/dma-resv.h>
  16 #include <linux/firmware.h>
  17 #include <linux/interrupt.h>
  18 #include <linux/io.h>
  19 #include <linux/iopoll.h>
  20 #include <linux/iosys-map.h>
  21 #include <linux/module.h>
  22 #include <linux/platform_device.h>
  23 #include <linux/pm_runtime.h>
  24
  25 #include "panthor_devfreq.h"
  26 #include "panthor_device.h"
  27 #include "panthor_fw.h"
  28 #include "panthor_gem.h"
  29 #include "panthor_gpu.h"
  30 #include "panthor_heap.h"
  31 #include "panthor_mmu.h"
  32 #include "panthor_regs.h"
  33 #include "panthor_sched.h"
  34
  35 /**
  36  * DOC: Scheduler
  37  *
  38  * Mali CSF hardware adopts a firmware-assisted scheduling model, where
  39  * the firmware takes care of scheduling aspects, to some extent.
  40  *
  41  * The scheduling happens at the scheduling group level, each group
  42  * contains 1 to N queues (N is FW/hardware dependent, and exposed
  43  * through the firmware interface). Each queue is assigned a command
  44  * stream ring buffer, which serves as a way to get jobs submitted to
  45  * the GPU, among other things.
  46  *
  47  * The firmware can schedule a maximum of M groups (M is FW/hardware
  48  * dependent, and exposed through the firmware interface). Passed
  49  * this maximum number of groups, the kernel must take care of
  50  * rotating the groups passed to the firmware so every group gets
  51  * a chance to have his queues scheduled for execution.
  52  *
  53  * The current implementation only supports with kernel-mode queues.
  54  * In other terms, userspace doesn't have access to the ring-buffer.
  55  * Instead, userspace passes indirect command stream buffers that are
  56  * called from the queue ring-buffer by the kernel using a pre-defined
  57  * sequence of command stream instructions to ensure the userspace driver
  58  * always gets consistent results (cache maintenance,
  59  * synchronization, ...).
  60  *
  61  * We rely on the drm_gpu_scheduler framework to deal with job
  62  * dependencies and submission. As any other driver dealing with a
  63  * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
  64  * entity has its own job scheduler. When a job is ready to be executed
  65  * (all its dependencies are met), it is pushed to the appropriate
  66  * queue ring-buffer, and the group is scheduled for execution if it
  67  * wasn't already active.
  68  *
  69  * Kernel-side group scheduling is timeslice-based. When we have less
  70  * groups than there are slots, the periodic tick is disabled and we
  71  * just let the FW schedule the active groups. When there are more
  72  * groups than slots, we let each group a chance to execute stuff for
  73  * a given amount of time, and then re-evaluate and pick new groups
  74  * to schedule. The group selection algorithm is based on
  75  * priority+round-robin.
  76  *
  77  * Even though user-mode queues is out of the scope right now, the
  78  * current design takes them into account by avoiding any guess on the
  79  * group/queue state that would be based on information we wouldn't have
  80  * if userspace was in charge of the ring-buffer. That's also one of the
  81  * reason we don't do 'cooperative' scheduling (encoding FW group slot
  82  * reservation as dma_fence that would be returned from the
  83  * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
  84  * a queue of waiters, ordered by job submission order). This approach
  85  * would work for kernel-mode queues, but would make user-mode queues a
  86  * lot more complicated to retrofit.
  87  */
  88
  89 #define JOB_TIMEOUT_MS                          5000
  90
  91 #define MIN_CS_PER_CSG                          8
  92
  93 #define MIN_CSGS                                3
  94 #define MAX_CSG_PRIO                            0xf
  95
  96 struct panthor_group;
  97
  98 /**
  99  * struct panthor_csg_slot - Command stream group slot
 100  *
 101  * This represents a FW slot for a scheduling group.
 102  */
 103 struct panthor_csg_slot {
 104         /** @group: Scheduling group bound to this slot. */
 105         struct panthor_group *group;
 106
 107         /** @priority: Group priority. */
 108         u8 priority;
 109
 110         /**
 111          * @idle: True if the group bound to this slot is idle.
 112          *
 113          * A group is idle when it has nothing waiting for execution on
 114          * all its queues, or when queues are blocked waiting for something
 115          * to happen (synchronization object).
 116          */
 117         bool idle;
 118 };
 119
 120 /**
 121  * enum panthor_csg_priority - Group priority
 122  */
 123 enum panthor_csg_priority {
 124         /** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
 125         PANTHOR_CSG_PRIORITY_LOW = 0,
 126
 127         /** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
 128         PANTHOR_CSG_PRIORITY_MEDIUM,
 129
 130         /** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
 131         PANTHOR_CSG_PRIORITY_HIGH,
 132
 133         /**
 134          * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
 135          *
 136          * Real-time priority allows one to preempt scheduling of other
 137          * non-real-time groups. When such a group becomes executable,
 138          * it will evict the group with the lowest non-rt priority if
 139          * there's no free group slot available.
 140          *
 141          * Currently not exposed to userspace.
 142          */
 143         PANTHOR_CSG_PRIORITY_RT,
 144
 145         /** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
 146         PANTHOR_CSG_PRIORITY_COUNT,
 147 };
 148
 149 /**
 150  * struct panthor_scheduler - Object used to manage the scheduler
 151  */
 152 struct panthor_scheduler {
 153         /** @ptdev: Device. */
 154         struct panthor_device *ptdev;
 155
 156         /**
 157          * @wq: Workqueue used by our internal scheduler logic and
 158          * drm_gpu_scheduler.
 159          *
 160          * Used for the scheduler tick, group update or other kind of FW
 161          * event processing that can't be handled in the threaded interrupt
 162          * path. Also passed to the drm_gpu_scheduler instances embedded
 163          * in panthor_queue.
 164          */
 165         struct workqueue_struct *wq;
 166
 167         /**
 168          * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
 169          *
 170          * We have a queue dedicated to heap chunk allocation works to avoid
 171          * blocking the rest of the scheduler if the allocation tries to
 172          * reclaim memory.
 173          */
 174         struct workqueue_struct *heap_alloc_wq;
 175
 176         /** @tick_work: Work executed on a scheduling tick. */
 177         struct delayed_work tick_work;
 178
 179         /**
 180          * @sync_upd_work: Work used to process synchronization object updates.
 181          *
 182          * We use this work to unblock queues/groups that were waiting on a
 183          * synchronization object.
 184          */
 185         struct work_struct sync_upd_work;
 186
 187         /**
 188          * @fw_events_work: Work used to process FW events outside the interrupt path.
 189          *
 190          * Even if the interrupt is threaded, we need any event processing
 191          * that require taking the panthor_scheduler::lock to be processed
 192          * outside the interrupt path so we don't block the tick logic when
 193          * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
 194          * event processing requires taking this lock, we just delegate all
 195          * FW event processing to the scheduler workqueue.
 196          */
 197         struct work_struct fw_events_work;
 198
 199         /**
 200          * @fw_events: Bitmask encoding pending FW events.
 201          */
 202         atomic_t fw_events;
 203
 204         /**
 205          * @resched_target: When the next tick should occur.
 206          *
 207          * Expressed in jiffies.
 208          */
 209         u64 resched_target;
 210
 211         /**
 212          * @last_tick: When the last tick occurred.
 213          *
 214          * Expressed in jiffies.
 215          */
 216         u64 last_tick;
 217
 218         /** @tick_period: Tick period in jiffies. */
 219         u64 tick_period;
 220
 221         /**
 222          * @lock: Lock protecting access to all the scheduler fields.
 223          *
 224          * Should be taken in the tick work, the irq handler, and anywhere the @groups
 225          * fields are touched.
 226          */
 227         struct mutex lock;
 228
 229         /** @groups: Various lists used to classify groups. */
 230         struct {
 231                 /**
 232                  * @runnable: Runnable group lists.
 233                  *
 234                  * When a group has queues that want to execute something,
 235                  * its panthor_group::run_node should be inserted here.
 236                  *
 237                  * One list per-priority.
 238                  */
 239                 struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
 240
 241                 /**
 242                  * @idle: Idle group lists.
 243                  *
 244                  * When all queues of a group are idle (either because they
 245                  * have nothing to execute, or because they are blocked), the
 246                  * panthor_group::run_node field should be inserted here.
 247                  *
 248                  * One list per-priority.
 249                  */
 250                 struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
 251
 252                 /**
 253                  * @waiting: List of groups whose queues are blocked on a
 254                  * synchronization object.
 255                  *
 256                  * Insert panthor_group::wait_node here when a group is waiting
 257                  * for synchronization objects to be signaled.
 258                  *
 259                  * This list is evaluated in the @sync_upd_work work.
 260                  */
 261                 struct list_head waiting;
 262         } groups;
 263
 264         /**
 265          * @csg_slots: FW command stream group slots.
 266          */
 267         struct panthor_csg_slot csg_slots[MAX_CSGS];
 268
 269         /** @csg_slot_count: Number of command stream group slots exposed by the FW. */
 270         u32 csg_slot_count;
 271
 272         /** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
 273         u32 cs_slot_count;
 274
 275         /** @as_slot_count: Number of address space slots supported by the MMU. */
 276         u32 as_slot_count;
 277
 278         /** @used_csg_slot_count: Number of command stream group slot currently used. */
 279         u32 used_csg_slot_count;
 280
 281         /** @sb_slot_count: Number of scoreboard slots. */
 282         u32 sb_slot_count;
 283
 284         /**
 285          * @might_have_idle_groups: True if an active group might have become idle.
 286          *
 287          * This will force a tick, so other runnable groups can be scheduled if one
 288          * or more active groups became idle.
 289          */
 290         bool might_have_idle_groups;
 291
 292         /** @pm: Power management related fields. */
 293         struct {
 294                 /** @has_ref: True if the scheduler owns a runtime PM reference. */
 295                 bool has_ref;
 296         } pm;
 297
 298         /** @reset: Reset related fields. */
 299         struct {
 300                 /** @lock: Lock protecting the other reset fields. */
 301                 struct mutex lock;
 302
 303                 /**
 304                  * @in_progress: True if a reset is in progress.
 305                  *
 306                  * Set to true in panthor_sched_pre_reset() and back to false in
 307                  * panthor_sched_post_reset().
 308                  */
 309                 atomic_t in_progress;
 310
 311                 /**
 312                  * @stopped_groups: List containing all groups that were stopped
 313                  * before a reset.
 314                  *
 315                  * Insert panthor_group::run_node in the pre_reset path.
 316                  */
 317                 struct list_head stopped_groups;
 318         } reset;
 319 };
 320
 321 /**
 322  * struct panthor_syncobj_32b - 32-bit FW synchronization object
 323  */
 324 struct panthor_syncobj_32b {
 325         /** @seqno: Sequence number. */
 326         u32 seqno;
 327
 328         /**
 329          * @status: Status.
 330          *
 331          * Not zero on failure.
 332          */
 333         u32 status;
 334 };
 335
 336 /**
 337  * struct panthor_syncobj_64b - 64-bit FW synchronization object
 338  */
 339 struct panthor_syncobj_64b {
 340         /** @seqno: Sequence number. */
 341         u64 seqno;
 342
 343         /**
 344          * @status: Status.
 345          *
 346          * Not zero on failure.
 347          */
 348         u32 status;
 349
 350         /** @pad: MBZ. */
 351         u32 pad;
 352 };
 353
 354 /**
 355  * struct panthor_queue - Execution queue
 356  */
 357 struct panthor_queue {
 358         /** @scheduler: DRM scheduler used for this queue. */
 359         struct drm_gpu_scheduler scheduler;
 360
 361         /** @entity: DRM scheduling entity used for this queue. */
 362         struct drm_sched_entity entity;
 363
 364         /**
 365          * @remaining_time: Time remaining before the job timeout expires.
 366          *
 367          * The job timeout is suspended when the queue is not scheduled by the
 368          * FW. Every time we suspend the timer, we need to save the remaining
 369          * time so we can restore it later on.
 370          */
 371         unsigned long remaining_time;
 372
 373         /** @timeout_suspended: True if the job timeout was suspended. */
 374         bool timeout_suspended;
 375
 376         /**
 377          * @doorbell_id: Doorbell assigned to this queue.
 378          *
 379          * Right now, all groups share the same doorbell, and the doorbell ID
 380          * is assigned to group_slot + 1 when the group is assigned a slot. But
 381          * we might decide to provide fine grained doorbell assignment at some
 382          * point, so don't have to wake up all queues in a group every time one
 383          * of them is updated.
 384          */
 385         u8 doorbell_id;
 386
 387         /**
 388          * @priority: Priority of the queue inside the group.
 389          *
 390          * Must be less than 16 (Only 4 bits available).
 391          */
 392         u8 priority;
 393 #define CSF_MAX_QUEUE_PRIO      GENMASK(3, 0)
 394
 395         /** @ringbuf: Command stream ring-buffer. */
 396         struct panthor_kernel_bo *ringbuf;
 397
 398         /** @iface: Firmware interface. */
 399         struct {
 400                 /** @mem: FW memory allocated for this interface. */
 401                 struct panthor_kernel_bo *mem;
 402
 403                 /** @input: Input interface. */
 404                 struct panthor_fw_ringbuf_input_iface *input;
 405
 406                 /** @output: Output interface. */
 407                 const struct panthor_fw_ringbuf_output_iface *output;
 408
 409                 /** @input_fw_va: FW virtual address of the input interface buffer. */
 410                 u32 input_fw_va;
 411
 412                 /** @output_fw_va: FW virtual address of the output interface buffer. */
 413                 u32 output_fw_va;
 414         } iface;
 415
 416         /**
 417          * @syncwait: Stores information about the synchronization object this
 418          * queue is waiting on.
 419          */
 420         struct {
 421                 /** @gpu_va: GPU address of the synchronization object. */
 422                 u64 gpu_va;
 423
 424                 /** @ref: Reference value to compare against. */
 425                 u64 ref;
 426
 427                 /** @gt: True if this is a greater-than test. */
 428                 bool gt;
 429
 430                 /** @sync64: True if this is a 64-bit sync object. */
 431                 bool sync64;
 432
 433                 /** @bo: Buffer object holding the synchronization object. */
 434                 struct drm_gem_object *obj;
 435
 436                 /** @offset: Offset of the synchronization object inside @bo. */
 437                 u64 offset;
 438
 439                 /**
 440                  * @kmap: Kernel mapping of the buffer object holding the
 441                  * synchronization object.
 442                  */
 443                 void *kmap;
 444         } syncwait;
 445
 446         /** @fence_ctx: Fence context fields. */
 447         struct {
 448                 /** @lock: Used to protect access to all fences allocated by this context. */
 449                 spinlock_t lock;
 450
 451                 /**
 452                  * @id: Fence context ID.
 453                  *
 454                  * Allocated with dma_fence_context_alloc().
 455                  */
 456                 u64 id;
 457
 458                 /** @seqno: Sequence number of the last initialized fence. */
 459                 atomic64_t seqno;
 460
 461                 /**
 462                  * @in_flight_jobs: List containing all in-flight jobs.
 463                  *
 464                  * Used to keep track and signal panthor_job::done_fence when the
 465                  * synchronization object attached to the queue is signaled.
 466                  */
 467                 struct list_head in_flight_jobs;
 468         } fence_ctx;
 469 };
 470
 471 /**
 472  * enum panthor_group_state - Scheduling group state.
 473  */
 474 enum panthor_group_state {
 475         /** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
 476         PANTHOR_CS_GROUP_CREATED,
 477
 478         /** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
 479         PANTHOR_CS_GROUP_ACTIVE,
 480
 481         /**
 482          * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
 483          * inactive/suspended right now.
 484          */
 485         PANTHOR_CS_GROUP_SUSPENDED,
 486
 487         /**
 488          * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
 489          *
 490          * Can no longer be scheduled. The only allowed action is a destruction.
 491          */
 492         PANTHOR_CS_GROUP_TERMINATED,
 493
 494         /**
 495          * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
 496          *
 497          * The FW returned an inconsistent state. The group is flagged unusable
 498          * and can no longer be scheduled. The only allowed action is a
 499          * destruction.
 500          *
 501          * When that happens, we also schedule a FW reset, to start from a fresh
 502          * state.
 503          */
 504         PANTHOR_CS_GROUP_UNKNOWN_STATE,
 505 };
 506
 507 /**
 508  * struct panthor_group - Scheduling group object
 509  */
 510 struct panthor_group {
 511         /** @refcount: Reference count */
 512         struct kref refcount;
 513
 514         /** @ptdev: Device. */
 515         struct panthor_device *ptdev;
 516
 517         /** @vm: VM bound to the group. */
 518         struct panthor_vm *vm;
 519
 520         /** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
 521         u64 compute_core_mask;
 522
 523         /** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
 524         u64 fragment_core_mask;
 525
 526         /** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
 527         u64 tiler_core_mask;
 528
 529         /** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
 530         u8 max_compute_cores;
 531
 532         /** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
 533         u8 max_fragment_cores;
 534
 535         /** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
 536         u8 max_tiler_cores;
 537
 538         /** @priority: Group priority (check panthor_csg_priority). */
 539         u8 priority;
 540
 541         /** @blocked_queues: Bitmask reflecting the blocked queues. */
 542         u32 blocked_queues;
 543
 544         /** @idle_queues: Bitmask reflecting the idle queues. */
 545         u32 idle_queues;
 546
 547         /** @fatal_lock: Lock used to protect access to fatal fields. */
 548         spinlock_t fatal_lock;
 549
 550         /** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
 551         u32 fatal_queues;
 552
 553         /** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
 554         atomic_t tiler_oom;
 555
 556         /** @queue_count: Number of queues in this group. */
 557         u32 queue_count;
 558
 559         /** @queues: Queues owned by this group. */
 560         struct panthor_queue *queues[MAX_CS_PER_CSG];
 561
 562         /**
 563          * @csg_id: ID of the FW group slot.
 564          *
 565          * -1 when the group is not scheduled/active.
 566          */
 567         int csg_id;
 568
 569         /**
 570          * @destroyed: True when the group has been destroyed.
 571          *
 572          * If a group is destroyed it becomes useless: no further jobs can be submitted
 573          * to its queues. We simply wait for all references to be dropped so we can
 574          * release the group object.
 575          */
 576         bool destroyed;
 577
 578         /**
 579          * @timedout: True when a timeout occurred on any of the queues owned by
 580          * this group.
 581          *
 582          * Timeouts can be reported by drm_sched or by the FW. In any case, any
 583          * timeout situation is unrecoverable, and the group becomes useless.
 584          * We simply wait for all references to be dropped so we can release the
 585          * group object.
 586          */
 587         bool timedout;
 588
 589         /**
 590          * @syncobjs: Pool of per-queue synchronization objects.
 591          *
 592          * One sync object per queue. The position of the sync object is
 593          * determined by the queue index.
 594          */
 595         struct panthor_kernel_bo *syncobjs;
 596
 597         /** @state: Group state. */
 598         enum panthor_group_state state;
 599
 600         /**
 601          * @suspend_buf: Suspend buffer.
 602          *
 603          * Stores the state of the group and its queues when a group is suspended.
 604          * Used at resume time to restore the group in its previous state.
 605          *
 606          * The size of the suspend buffer is exposed through the FW interface.
 607          */
 608         struct panthor_kernel_bo *suspend_buf;
 609
 610         /**
 611          * @protm_suspend_buf: Protection mode suspend buffer.
 612          *
 613          * Stores the state of the group and its queues when a group that's in
 614          * protection mode is suspended.
 615          *
 616          * Used at resume time to restore the group in its previous state.
 617          *
 618          * The size of the protection mode suspend buffer is exposed through the
 619          * FW interface.
 620          */
 621         struct panthor_kernel_bo *protm_suspend_buf;
 622
 623         /** @sync_upd_work: Work used to check/signal job fences. */
 624         struct work_struct sync_upd_work;
 625
 626         /** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
 627         struct work_struct tiler_oom_work;
 628
 629         /** @term_work: Work used to finish the group termination procedure. */
 630         struct work_struct term_work;
 631
 632         /**
 633          * @release_work: Work used to release group resources.
 634          *
 635          * We need to postpone the group release to avoid a deadlock when
 636          * the last ref is released in the tick work.
 637          */
 638         struct work_struct release_work;
 639
 640         /**
 641          * @run_node: Node used to insert the group in the
 642          * panthor_group::groups::{runnable,idle} and
 643          * panthor_group::reset.stopped_groups lists.
 644          */
 645         struct list_head run_node;
 646
 647         /**
 648          * @wait_node: Node used to insert the group in the
 649          * panthor_group::groups::waiting list.
 650          */
 651         struct list_head wait_node;
 652 };
 653
 654 /**
 655  * group_queue_work() - Queue a group work
 656  * @group: Group to queue the work for.
 657  * @wname: Work name.
 658  *
 659  * Grabs a ref and queue a work item to the scheduler workqueue. If
 660  * the work was already queued, we release the reference we grabbed.
 661  *
 662  * Work callbacks must release the reference we grabbed here.
 663  */
 664 #define group_queue_work(group, wname) \
 665         do { \
 666                 group_get(group); \
 667                 if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
 668                         group_put(group); \
 669         } while (0)
 670
 671 /**
 672  * sched_queue_work() - Queue a scheduler work.
 673  * @sched: Scheduler object.
 674  * @wname: Work name.
 675  *
 676  * Conditionally queues a scheduler work if no reset is pending/in-progress.
 677  */
 678 #define sched_queue_work(sched, wname) \
 679         do { \
 680                 if (!atomic_read(&(sched)->reset.in_progress) && \
 681                     !panthor_device_reset_is_pending((sched)->ptdev)) \
 682                         queue_work((sched)->wq, &(sched)->wname ## _work); \
 683         } while (0)
 684
 685 /**
 686  * sched_queue_delayed_work() - Queue a scheduler delayed work.
 687  * @sched: Scheduler object.
 688  * @wname: Work name.
 689  * @delay: Work delay in jiffies.
 690  *
 691  * Conditionally queues a scheduler delayed work if no reset is
 692  * pending/in-progress.
 693  */
 694 #define sched_queue_delayed_work(sched, wname, delay) \
 695         do { \
 696                 if (!atomic_read(&sched->reset.in_progress) && \
 697                     !panthor_device_reset_is_pending((sched)->ptdev)) \
 698                         mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
 699         } while (0)
 700
 701 /*
 702  * We currently set the maximum of groups per file to an arbitrary low value.
 703  * But this can be updated if we need more.
 704  */
 705 #define MAX_GROUPS_PER_POOL 128
 706
 707 /**
 708  * struct panthor_group_pool - Group pool
 709  *
 710  * Each file get assigned a group pool.
 711  */
 712 struct panthor_group_pool {
 713         /** @xa: Xarray used to manage group handles. */
 714         struct xarray xa;
 715 };
 716
 717 /**
 718  * struct panthor_job - Used to manage GPU job
 719  */
 720 struct panthor_job {
 721         /** @base: Inherit from drm_sched_job. */
 722         struct drm_sched_job base;
 723
 724         /** @refcount: Reference count. */
 725         struct kref refcount;
 726
 727         /** @group: Group of the queue this job will be pushed to. */
 728         struct panthor_group *group;
 729
 730         /** @queue_idx: Index of the queue inside @group. */
 731         u32 queue_idx;
 732
 733         /** @call_info: Information about the userspace command stream call. */
 734         struct {
 735                 /** @start: GPU address of the userspace command stream. */
 736                 u64 start;
 737
 738                 /** @size: Size of the userspace command stream. */
 739                 u32 size;
 740
 741                 /**
 742                  * @latest_flush: Flush ID at the time the userspace command
 743                  * stream was built.
 744                  *
 745                  * Needed for the flush reduction mechanism.
 746                  */
 747                 u32 latest_flush;
 748         } call_info;
 749
 750         /** @ringbuf: Position of this job is in the ring buffer. */
 751         struct {
 752                 /** @start: Start offset. */
 753                 u64 start;
 754
 755                 /** @end: End offset. */
 756                 u64 end;
 757         } ringbuf;
 758
 759         /**
 760          * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
 761          * list.
 762          */
 763         struct list_head node;
 764
 765         /** @done_fence: Fence signaled when the job is finished or cancelled. */
 766         struct dma_fence *done_fence;
 767 };
 768
 769 static void
 770 panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
 771 {
 772         if (queue->syncwait.kmap) {
 773                 struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
 774
 775                 drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
 776                 queue->syncwait.kmap = NULL;
 777         }
 778
 779         drm_gem_object_put(queue->syncwait.obj);
 780         queue->syncwait.obj = NULL;
 781 }
 782
 783 static void *
 784 panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
 785 {
 786         struct panthor_device *ptdev = group->ptdev;
 787         struct panthor_gem_object *bo;
 788         struct iosys_map map;
 789         int ret;
 790
 791         if (queue->syncwait.kmap)
 792                 return queue->syncwait.kmap + queue->syncwait.offset;
 793
 794         bo = panthor_vm_get_bo_for_va(group->vm,
 795                                       queue->syncwait.gpu_va,
 796                                       &queue->syncwait.offset);
 797         if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
 798                 goto err_put_syncwait_obj;
 799
 800         queue->syncwait.obj = &bo->base.base;
 801         ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
 802         if (drm_WARN_ON(&ptdev->base, ret))
 803                 goto err_put_syncwait_obj;
 804
 805         queue->syncwait.kmap = map.vaddr;
 806         if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
 807                 goto err_put_syncwait_obj;
 808
 809         return queue->syncwait.kmap + queue->syncwait.offset;
 810
 811 err_put_syncwait_obj:
 812         panthor_queue_put_syncwait_obj(queue);
 813         return NULL;
 814 }
 815
 816 static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
 817 {
 818         if (IS_ERR_OR_NULL(queue))
 819                 return;
 820
 821         if (queue->entity.fence_context)
 822                 drm_sched_entity_destroy(&queue->entity);
 823
 824         if (queue->scheduler.ops)
 825                 drm_sched_fini(&queue->scheduler);
 826
 827         panthor_queue_put_syncwait_obj(queue);
 828
 829         panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
 830         panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
 831
 832         kfree(queue);
 833 }
 834
 835 static void group_release_work(struct work_struct *work)
 836 {
 837         struct panthor_group *group = container_of(work,
 838                                                    struct panthor_group,
 839                                                    release_work);
 840         struct panthor_device *ptdev = group->ptdev;
 841         u32 i;
 842
 843         for (i = 0; i < group->queue_count; i++)
 844                 group_free_queue(group, group->queues[i]);
 845
 846         panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
 847         panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
 848         panthor_kernel_bo_destroy(group->vm, group->syncobjs);
 849
 850         panthor_vm_put(group->vm);
 851         kfree(group);
 852 }
 853
 854 static void group_release(struct kref *kref)
 855 {
 856         struct panthor_group *group = container_of(kref,
 857                                                    struct panthor_group,
 858                                                    refcount);
 859         struct panthor_device *ptdev = group->ptdev;
 860
 861         drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
 862         drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
 863         drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
 864
 865         queue_work(panthor_cleanup_wq, &group->release_work);
 866 }
 867
 868 static void group_put(struct panthor_group *group)
 869 {
 870         if (group)
 871                 kref_put(&group->refcount, group_release);
 872 }
 873
 874 static struct panthor_group *
 875 group_get(struct panthor_group *group)
 876 {
 877         if (group)
 878                 kref_get(&group->refcount);
 879
 880         return group;
 881 }
 882
 883 /**
 884  * group_bind_locked() - Bind a group to a group slot
 885  * @group: Group.
 886  * @csg_id: Slot.
 887  *
 888  * Return: 0 on success, a negative error code otherwise.
 889  */
 890 static int
 891 group_bind_locked(struct panthor_group *group, u32 csg_id)
 892 {
 893         struct panthor_device *ptdev = group->ptdev;
 894         struct panthor_csg_slot *csg_slot;
 895         int ret;
 896
 897         lockdep_assert_held(&ptdev->scheduler->lock);
 898
 899         if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
 900                         ptdev->scheduler->csg_slots[csg_id].group))
 901                 return -EINVAL;
 902
 903         ret = panthor_vm_active(group->vm);
 904         if (ret)
 905                 return ret;
 906
 907         csg_slot = &ptdev->scheduler->csg_slots[csg_id];
 908         group_get(group);
 909         group->csg_id = csg_id;
 910
 911         /* Dummy doorbell allocation: doorbell is assigned to the group and
 912          * all queues use the same doorbell.
 913          *
 914          * TODO: Implement LRU-based doorbell assignment, so the most often
 915          * updated queues get their own doorbell, thus avoiding useless checks
 916          * on queues belonging to the same group that are rarely updated.
 917          */
 918         for (u32 i = 0; i < group->queue_count; i++)
 919                 group->queues[i]->doorbell_id = csg_id + 1;
 920
 921         csg_slot->group = group;
 922
 923         return 0;
 924 }
 925
 926 /**
 927  * group_unbind_locked() - Unbind a group from a slot.
 928  * @group: Group to unbind.
 929  *
 930  * Return: 0 on success, a negative error code otherwise.
 931  */
 932 static int
 933 group_unbind_locked(struct panthor_group *group)
 934 {
 935         struct panthor_device *ptdev = group->ptdev;
 936         struct panthor_csg_slot *slot;
 937
 938         lockdep_assert_held(&ptdev->scheduler->lock);
 939
 940         if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
 941                 return -EINVAL;
 942
 943         if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
 944                 return -EINVAL;
 945
 946         slot = &ptdev->scheduler->csg_slots[group->csg_id];
 947         panthor_vm_idle(group->vm);
 948         group->csg_id = -1;
 949
 950         /* Tiler OOM events will be re-issued next time the group is scheduled. */
 951         atomic_set(&group->tiler_oom, 0);
 952         cancel_work(&group->tiler_oom_work);
 953
 954         for (u32 i = 0; i < group->queue_count; i++)
 955                 group->queues[i]->doorbell_id = -1;
 956
 957         slot->group = NULL;
 958
 959         group_put(group);
 960         return 0;
 961 }
 962
 963 /**
 964  * cs_slot_prog_locked() - Program a queue slot
 965  * @ptdev: Device.
 966  * @csg_id: Group slot ID.
 967  * @cs_id: Queue slot ID.
 968  *
 969  * Program a queue slot with the queue information so things can start being
 970  * executed on this queue.
 971  *
 972  * The group slot must have a group bound to it already (group_bind_locked()).
 973  */
 974 static void
 975 cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
 976 {
 977         struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
 978         struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
 979
 980         lockdep_assert_held(&ptdev->scheduler->lock);
 981
 982         queue->iface.input->extract = queue->iface.output->extract;
 983         drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
 984
 985         cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
 986         cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
 987         cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
 988         cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
 989         cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
 990                                   CS_CONFIG_DOORBELL(queue->doorbell_id);
 991         cs_iface->input->ack_irq_mask = ~0;
 992         panthor_fw_update_reqs(cs_iface, req,
 993                                CS_IDLE_SYNC_WAIT |
 994                                CS_IDLE_EMPTY |
 995                                CS_STATE_START |
 996                                CS_EXTRACT_EVENT,
 997                                CS_IDLE_SYNC_WAIT |
 998                                CS_IDLE_EMPTY |
 999                                CS_STATE_MASK |
1000                                CS_EXTRACT_EVENT);
1001         if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
1002                 drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
1003                 queue->timeout_suspended = false;
1004         }
1005 }
1006
1007 /**
1008  * cs_slot_reset_locked() - Reset a queue slot
1009  * @ptdev: Device.
1010  * @csg_id: Group slot.
1011  * @cs_id: Queue slot.
1012  *
1013  * Change the queue slot state to STOP and suspend the queue timeout if
1014  * the queue is not blocked.
1015  *
1016  * The group slot must have a group bound to it (group_bind_locked()).
1017  */
1018 static int
1019 cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1020 {
1021         struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1022         struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1023         struct panthor_queue *queue = group->queues[cs_id];
1024
1025         lockdep_assert_held(&ptdev->scheduler->lock);
1026
1027         panthor_fw_update_reqs(cs_iface, req,
1028                                CS_STATE_STOP,
1029                                CS_STATE_MASK);
1030
1031         /* If the queue is blocked, we want to keep the timeout running, so
1032          * we can detect unbounded waits and kill the group when that happens.
1033          */
1034         if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
1035                 queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
1036                 queue->timeout_suspended = true;
1037                 WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
1038         }
1039
1040         return 0;
1041 }
1042
1043 /**
1044  * csg_slot_sync_priority_locked() - Synchronize the group slot priority
1045  * @ptdev: Device.
1046  * @csg_id: Group slot ID.
1047  *
1048  * Group slot priority update happens asynchronously. When we receive a
1049  * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
1050  * reflect it to our panthor_csg_slot object.
1051  */
1052 static void
1053 csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
1054 {
1055         struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1056         struct panthor_fw_csg_iface *csg_iface;
1057
1058         lockdep_assert_held(&ptdev->scheduler->lock);
1059
1060         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1061         csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
1062 }
1063
1064 /**
1065  * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
1066  * @ptdev: Device.
1067  * @csg_id: Group slot.
1068  * @cs_id: Queue slot.
1069  *
1070  * Queue state is updated on group suspend or STATUS_UPDATE event.
1071  */
1072 static void
1073 cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1074 {
1075         struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1076         struct panthor_queue *queue = group->queues[cs_id];
1077         struct panthor_fw_cs_iface *cs_iface =
1078                 panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
1079
1080         u32 status_wait_cond;
1081
1082         switch (cs_iface->output->status_blocked_reason) {
1083         case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
1084                 if (queue->iface.input->insert == queue->iface.output->extract &&
1085                     cs_iface->output->status_scoreboards == 0)
1086                         group->idle_queues |= BIT(cs_id);
1087                 break;
1088
1089         case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
1090                 if (list_empty(&group->wait_node)) {
1091                         list_move_tail(&group->wait_node,
1092                                        &group->ptdev->scheduler->groups.waiting);
1093                 }
1094                 group->blocked_queues |= BIT(cs_id);
1095                 queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
1096                 queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
1097                 status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
1098                 queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
1099                 if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
1100                         u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
1101
1102                         queue->syncwait.sync64 = true;
1103                         queue->syncwait.ref |= sync_val_hi << 32;
1104                 } else {
1105                         queue->syncwait.sync64 = false;
1106                 }
1107                 break;
1108
1109         default:
1110                 /* Other reasons are not blocking. Consider the queue as runnable
1111                  * in those cases.
1112                  */
1113                 break;
1114         }
1115 }
1116
1117 static void
1118 csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
1119 {
1120         struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1121         struct panthor_group *group = csg_slot->group;
1122         u32 i;
1123
1124         lockdep_assert_held(&ptdev->scheduler->lock);
1125
1126         group->idle_queues = 0;
1127         group->blocked_queues = 0;
1128
1129         for (i = 0; i < group->queue_count; i++) {
1130                 if (group->queues[i])
1131                         cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
1132         }
1133 }
1134
1135 static void
1136 csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
1137 {
1138         struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1139         struct panthor_fw_csg_iface *csg_iface;
1140         struct panthor_group *group;
1141         enum panthor_group_state new_state, old_state;
1142         u32 csg_state;
1143
1144         lockdep_assert_held(&ptdev->scheduler->lock);
1145
1146         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1147         group = csg_slot->group;
1148
1149         if (!group)
1150                 return;
1151
1152         old_state = group->state;
1153         csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1154         switch (csg_state) {
1155         case CSG_STATE_START:
1156         case CSG_STATE_RESUME:
1157                 new_state = PANTHOR_CS_GROUP_ACTIVE;
1158                 break;
1159         case CSG_STATE_TERMINATE:
1160                 new_state = PANTHOR_CS_GROUP_TERMINATED;
1161                 break;
1162         case CSG_STATE_SUSPEND:
1163                 new_state = PANTHOR_CS_GROUP_SUSPENDED;
1164                 break;
1165         default:
1166                 /* The unknown state might be caused by a FW state corruption,
1167                  * which means the group metadata can't be trusted anymore, and
1168                  * the SUSPEND operation might propagate the corruption to the
1169                  * suspend buffers. Flag the group state as unknown to make
1170                  * sure it's unusable after that point.
1171                  */
1172                 drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1173                         csg_id, csg_state);
1174                 new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1175                 break;
1176         }
1177
1178         if (old_state == new_state)
1179                 return;
1180
1181         /* The unknown state might be caused by a FW issue, reset the FW to
1182          * take a fresh start.
1183          */
1184         if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1185                 panthor_device_schedule_reset(ptdev);
1186
1187         if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
1188                 csg_slot_sync_queues_state_locked(ptdev, csg_id);
1189
1190         if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
1191                 u32 i;
1192
1193                 /* Reset the queue slots so we start from a clean
1194                  * state when starting/resuming a new group on this
1195                  * CSG slot. No wait needed here, and no ringbell
1196                  * either, since the CS slot will only be re-used
1197                  * on the next CSG start operation.
1198                  */
1199                 for (i = 0; i < group->queue_count; i++) {
1200                         if (group->queues[i])
1201                                 cs_slot_reset_locked(ptdev, csg_id, i);
1202                 }
1203         }
1204
1205         group->state = new_state;
1206 }
1207
1208 static int
1209 csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
1210 {
1211         struct panthor_fw_csg_iface *csg_iface;
1212         struct panthor_csg_slot *csg_slot;
1213         struct panthor_group *group;
1214         u32 queue_mask = 0, i;
1215
1216         lockdep_assert_held(&ptdev->scheduler->lock);
1217
1218         if (priority > MAX_CSG_PRIO)
1219                 return -EINVAL;
1220
1221         if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
1222                 return -EINVAL;
1223
1224         csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1225         group = csg_slot->group;
1226         if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
1227                 return 0;
1228
1229         csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
1230
1231         for (i = 0; i < group->queue_count; i++) {
1232                 if (group->queues[i]) {
1233                         cs_slot_prog_locked(ptdev, csg_id, i);
1234                         queue_mask |= BIT(i);
1235                 }
1236         }
1237
1238         csg_iface->input->allow_compute = group->compute_core_mask;
1239         csg_iface->input->allow_fragment = group->fragment_core_mask;
1240         csg_iface->input->allow_other = group->tiler_core_mask;
1241         csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
1242                                          CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
1243                                          CSG_EP_REQ_TILER(group->max_tiler_cores) |
1244                                          CSG_EP_REQ_PRIORITY(priority);
1245         csg_iface->input->config = panthor_vm_as(group->vm);
1246
1247         if (group->suspend_buf)
1248                 csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
1249         else
1250                 csg_iface->input->suspend_buf = 0;
1251
1252         if (group->protm_suspend_buf) {
1253                 csg_iface->input->protm_suspend_buf =
1254                         panthor_kernel_bo_gpuva(group->protm_suspend_buf);
1255         } else {
1256                 csg_iface->input->protm_suspend_buf = 0;
1257         }
1258
1259         csg_iface->input->ack_irq_mask = ~0;
1260         panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
1261         return 0;
1262 }
1263
1264 static void
1265 cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
1266                                    u32 csg_id, u32 cs_id)
1267 {
1268         struct panthor_scheduler *sched = ptdev->scheduler;
1269         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1270         struct panthor_group *group = csg_slot->group;
1271         struct panthor_fw_cs_iface *cs_iface;
1272         u32 fatal;
1273         u64 info;
1274
1275         lockdep_assert_held(&sched->lock);
1276
1277         cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1278         fatal = cs_iface->output->fatal;
1279         info = cs_iface->output->fatal_info;
1280
1281         if (group)
1282                 group->fatal_queues |= BIT(cs_id);
1283
1284         sched_queue_delayed_work(sched, tick, 0);
1285         drm_warn(&ptdev->base,
1286                  "CSG slot %d CS slot: %d\n"
1287                  "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
1288                  "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
1289                  "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
1290                  csg_id, cs_id,
1291                  (unsigned int)CS_EXCEPTION_TYPE(fatal),
1292                  panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
1293                  (unsigned int)CS_EXCEPTION_DATA(fatal),
1294                  info);
1295 }
1296
1297 static void
1298 cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
1299                                    u32 csg_id, u32 cs_id)
1300 {
1301         struct panthor_scheduler *sched = ptdev->scheduler;
1302         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1303         struct panthor_group *group = csg_slot->group;
1304         struct panthor_queue *queue = group && cs_id < group->queue_count ?
1305                                       group->queues[cs_id] : NULL;
1306         struct panthor_fw_cs_iface *cs_iface;
1307         u32 fault;
1308         u64 info;
1309
1310         lockdep_assert_held(&sched->lock);
1311
1312         cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1313         fault = cs_iface->output->fault;
1314         info = cs_iface->output->fault_info;
1315
1316         if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
1317                 u64 cs_extract = queue->iface.output->extract;
1318                 struct panthor_job *job;
1319
1320                 spin_lock(&queue->fence_ctx.lock);
1321                 list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
1322                         if (cs_extract >= job->ringbuf.end)
1323                                 continue;
1324
1325                         if (cs_extract < job->ringbuf.start)
1326                                 break;
1327
1328                         dma_fence_set_error(job->done_fence, -EINVAL);
1329                 }
1330                 spin_unlock(&queue->fence_ctx.lock);
1331         }
1332
1333         drm_warn(&ptdev->base,
1334                  "CSG slot %d CS slot: %d\n"
1335                  "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
1336                  "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
1337                  "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
1338                  csg_id, cs_id,
1339                  (unsigned int)CS_EXCEPTION_TYPE(fault),
1340                  panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
1341                  (unsigned int)CS_EXCEPTION_DATA(fault),
1342                  info);
1343 }
1344
1345 static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
1346 {
1347         struct panthor_device *ptdev = group->ptdev;
1348         struct panthor_scheduler *sched = ptdev->scheduler;
1349         u32 renderpasses_in_flight, pending_frag_count;
1350         struct panthor_heap_pool *heaps = NULL;
1351         u64 heap_address, new_chunk_va = 0;
1352         u32 vt_start, vt_end, frag_end;
1353         int ret, csg_id;
1354
1355         mutex_lock(&sched->lock);
1356         csg_id = group->csg_id;
1357         if (csg_id >= 0) {
1358                 struct panthor_fw_cs_iface *cs_iface;
1359
1360                 cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1361                 heaps = panthor_vm_get_heap_pool(group->vm, false);
1362                 heap_address = cs_iface->output->heap_address;
1363                 vt_start = cs_iface->output->heap_vt_start;
1364                 vt_end = cs_iface->output->heap_vt_end;
1365                 frag_end = cs_iface->output->heap_frag_end;
1366                 renderpasses_in_flight = vt_start - frag_end;
1367                 pending_frag_count = vt_end - frag_end;
1368         }
1369         mutex_unlock(&sched->lock);
1370
1371         /* The group got scheduled out, we stop here. We will get a new tiler OOM event
1372          * when it's scheduled again.
1373          */
1374         if (unlikely(csg_id < 0))
1375                 return 0;
1376
1377         if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
1378                 ret = -EINVAL;
1379         } else {
1380                 /* We do the allocation without holding the scheduler lock to avoid
1381                  * blocking the scheduling.
1382                  */
1383                 ret = panthor_heap_grow(heaps, heap_address,
1384                                         renderpasses_in_flight,
1385                                         pending_frag_count, &new_chunk_va);
1386         }
1387
1388         if (ret && ret != -EBUSY) {
1389                 drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
1390                 group->fatal_queues |= BIT(cs_id);
1391                 sched_queue_delayed_work(sched, tick, 0);
1392                 goto out_put_heap_pool;
1393         }
1394
1395         mutex_lock(&sched->lock);
1396         csg_id = group->csg_id;
1397         if (csg_id >= 0) {
1398                 struct panthor_fw_csg_iface *csg_iface;
1399                 struct panthor_fw_cs_iface *cs_iface;
1400
1401                 csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1402                 cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1403
1404                 cs_iface->input->heap_start = new_chunk_va;
1405                 cs_iface->input->heap_end = new_chunk_va;
1406                 panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
1407                 panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
1408                 panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1409         }
1410         mutex_unlock(&sched->lock);
1411
1412         /* We allocated a chunck, but couldn't link it to the heap
1413          * context because the group was scheduled out while we were
1414          * allocating memory. We need to return this chunk to the heap.
1415          */
1416         if (unlikely(csg_id < 0 && new_chunk_va))
1417                 panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
1418
1419         ret = 0;
1420
1421 out_put_heap_pool:
1422         panthor_heap_pool_put(heaps);
1423         return ret;
1424 }
1425
1426 static void group_tiler_oom_work(struct work_struct *work)
1427 {
1428         struct panthor_group *group =
1429                 container_of(work, struct panthor_group, tiler_oom_work);
1430         u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
1431
1432         while (tiler_oom) {
1433                 u32 cs_id = ffs(tiler_oom) - 1;
1434
1435                 group_process_tiler_oom(group, cs_id);
1436                 tiler_oom &= ~BIT(cs_id);
1437         }
1438
1439         group_put(group);
1440 }
1441
1442 static void
1443 cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
1444                                        u32 csg_id, u32 cs_id)
1445 {
1446         struct panthor_scheduler *sched = ptdev->scheduler;
1447         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1448         struct panthor_group *group = csg_slot->group;
1449
1450         lockdep_assert_held(&sched->lock);
1451
1452         if (drm_WARN_ON(&ptdev->base, !group))
1453                 return;
1454
1455         atomic_or(BIT(cs_id), &group->tiler_oom);
1456
1457         /* We don't use group_queue_work() here because we want to queue the
1458          * work item to the heap_alloc_wq.
1459          */
1460         group_get(group);
1461         if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
1462                 group_put(group);
1463 }
1464
1465 static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
1466                                        u32 csg_id, u32 cs_id)
1467 {
1468         struct panthor_fw_cs_iface *cs_iface;
1469         u32 req, ack, events;
1470
1471         lockdep_assert_held(&ptdev->scheduler->lock);
1472
1473         cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1474         req = cs_iface->input->req;
1475         ack = cs_iface->output->ack;
1476         events = (req ^ ack) & CS_EVT_MASK;
1477
1478         if (events & CS_FATAL)
1479                 cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
1480
1481         if (events & CS_FAULT)
1482                 cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
1483
1484         if (events & CS_TILER_OOM)
1485                 cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
1486
1487         /* We don't acknowledge the TILER_OOM event since its handling is
1488          * deferred to a separate work.
1489          */
1490         panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
1491
1492         return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
1493 }
1494
1495 static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
1496 {
1497         struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1498         struct panthor_fw_csg_iface *csg_iface;
1499
1500         lockdep_assert_held(&ptdev->scheduler->lock);
1501
1502         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1503         csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
1504 }
1505
1506 static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
1507 {
1508         struct panthor_scheduler *sched = ptdev->scheduler;
1509
1510         lockdep_assert_held(&sched->lock);
1511
1512         sched->might_have_idle_groups = true;
1513
1514         /* Schedule a tick so we can evict idle groups and schedule non-idle
1515          * ones. This will also update runtime PM and devfreq busy/idle states,
1516          * so the device can lower its frequency or get suspended.
1517          */
1518         sched_queue_delayed_work(sched, tick, 0);
1519 }
1520
1521 static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
1522                                         u32 csg_id)
1523 {
1524         struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1525         struct panthor_group *group = csg_slot->group;
1526
1527         lockdep_assert_held(&ptdev->scheduler->lock);
1528
1529         if (group)
1530                 group_queue_work(group, sync_upd);
1531
1532         sched_queue_work(ptdev->scheduler, sync_upd);
1533 }
1534
1535 static void
1536 csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
1537 {
1538         struct panthor_scheduler *sched = ptdev->scheduler;
1539         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1540         struct panthor_group *group = csg_slot->group;
1541
1542         lockdep_assert_held(&sched->lock);
1543
1544         drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
1545
1546         group = csg_slot->group;
1547         if (!drm_WARN_ON(&ptdev->base, !group))
1548                 group->timedout = true;
1549
1550         sched_queue_delayed_work(sched, tick, 0);
1551 }
1552
1553 static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
1554 {
1555         u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
1556         struct panthor_fw_csg_iface *csg_iface;
1557         u32 ring_cs_db_mask = 0;
1558
1559         lockdep_assert_held(&ptdev->scheduler->lock);
1560
1561         if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1562                 return;
1563
1564         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1565         req = READ_ONCE(csg_iface->input->req);
1566         ack = READ_ONCE(csg_iface->output->ack);
1567         cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
1568         cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
1569         csg_events = (req ^ ack) & CSG_EVT_MASK;
1570
1571         /* There may not be any pending CSG/CS interrupts to process */
1572         if (req == ack && cs_irq_req == cs_irq_ack)
1573                 return;
1574
1575         /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
1576          * examining the CS_ACK & CS_REQ bits. This would ensure that Host
1577          * doesn't miss an interrupt for the CS in the race scenario where
1578          * whilst Host is servicing an interrupt for the CS, firmware sends
1579          * another interrupt for that CS.
1580          */
1581         csg_iface->input->cs_irq_ack = cs_irq_req;
1582
1583         panthor_fw_update_reqs(csg_iface, req, ack,
1584                                CSG_SYNC_UPDATE |
1585                                CSG_IDLE |
1586                                CSG_PROGRESS_TIMER_EVENT);
1587
1588         if (csg_events & CSG_IDLE)
1589                 csg_slot_process_idle_event_locked(ptdev, csg_id);
1590
1591         if (csg_events & CSG_PROGRESS_TIMER_EVENT)
1592                 csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
1593
1594         cs_irqs = cs_irq_req ^ cs_irq_ack;
1595         while (cs_irqs) {
1596                 u32 cs_id = ffs(cs_irqs) - 1;
1597
1598                 if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
1599                         ring_cs_db_mask |= BIT(cs_id);
1600
1601                 cs_irqs &= ~BIT(cs_id);
1602         }
1603
1604         if (csg_events & CSG_SYNC_UPDATE)
1605                 csg_slot_sync_update_locked(ptdev, csg_id);
1606
1607         if (ring_cs_db_mask)
1608                 panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
1609
1610         panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1611 }
1612
1613 static void sched_process_idle_event_locked(struct panthor_device *ptdev)
1614 {
1615         struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1616
1617         lockdep_assert_held(&ptdev->scheduler->lock);
1618
1619         /* Acknowledge the idle event and schedule a tick. */
1620         panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
1621         sched_queue_delayed_work(ptdev->scheduler, tick, 0);
1622 }
1623
1624 /**
1625  * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
1626  * @ptdev: Device.
1627  */
1628 static void sched_process_global_irq_locked(struct panthor_device *ptdev)
1629 {
1630         struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1631         u32 req, ack, evts;
1632
1633         lockdep_assert_held(&ptdev->scheduler->lock);
1634
1635         req = READ_ONCE(glb_iface->input->req);
1636         ack = READ_ONCE(glb_iface->output->ack);
1637         evts = (req ^ ack) & GLB_EVT_MASK;
1638
1639         if (evts & GLB_IDLE)
1640                 sched_process_idle_event_locked(ptdev);
1641 }
1642
1643 static void process_fw_events_work(struct work_struct *work)
1644 {
1645         struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
1646                                                       fw_events_work);
1647         u32 events = atomic_xchg(&sched->fw_events, 0);
1648         struct panthor_device *ptdev = sched->ptdev;
1649
1650         mutex_lock(&sched->lock);
1651
1652         if (events & JOB_INT_GLOBAL_IF) {
1653                 sched_process_global_irq_locked(ptdev);
1654                 events &= ~JOB_INT_GLOBAL_IF;
1655         }
1656
1657         while (events) {
1658                 u32 csg_id = ffs(events) - 1;
1659
1660                 sched_process_csg_irq_locked(ptdev, csg_id);
1661                 events &= ~BIT(csg_id);
1662         }
1663
1664         mutex_unlock(&sched->lock);
1665 }
1666
1667 /**
1668  * panthor_sched_report_fw_events() - Report FW events to the scheduler.
1669  */
1670 void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
1671 {
1672         if (!ptdev->scheduler)
1673                 return;
1674
1675         atomic_or(events, &ptdev->scheduler->fw_events);
1676         sched_queue_work(ptdev->scheduler, fw_events);
1677 }
1678
1679 static const char *fence_get_driver_name(struct dma_fence *fence)
1680 {
1681         return "panthor";
1682 }
1683
1684 static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
1685 {
1686         return "queue-fence";
1687 }
1688
1689 static const struct dma_fence_ops panthor_queue_fence_ops = {
1690         .get_driver_name = fence_get_driver_name,
1691         .get_timeline_name = queue_fence_get_timeline_name,
1692 };
1693
1694 struct panthor_csg_slots_upd_ctx {
1695         u32 update_mask;
1696         u32 timedout_mask;
1697         struct {
1698                 u32 value;
1699                 u32 mask;
1700         } requests[MAX_CSGS];
1701 };
1702
1703 static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
1704 {
1705         memset(ctx, 0, sizeof(*ctx));
1706 }
1707
1708 static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
1709                                     struct panthor_csg_slots_upd_ctx *ctx,
1710                                     u32 csg_id, u32 value, u32 mask)
1711 {
1712         if (drm_WARN_ON(&ptdev->base, !mask) ||
1713             drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1714                 return;
1715
1716         ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
1717         ctx->requests[csg_id].mask |= mask;
1718         ctx->update_mask |= BIT(csg_id);
1719 }
1720
1721 static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
1722                                      struct panthor_csg_slots_upd_ctx *ctx)
1723 {
1724         struct panthor_scheduler *sched = ptdev->scheduler;
1725         u32 update_slots = ctx->update_mask;
1726
1727         lockdep_assert_held(&sched->lock);
1728
1729         if (!ctx->update_mask)
1730                 return 0;
1731
1732         while (update_slots) {
1733                 struct panthor_fw_csg_iface *csg_iface;
1734                 u32 csg_id = ffs(update_slots) - 1;
1735
1736                 update_slots &= ~BIT(csg_id);
1737                 csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1738                 panthor_fw_update_reqs(csg_iface, req,
1739                                        ctx->requests[csg_id].value,
1740                                        ctx->requests[csg_id].mask);
1741         }
1742
1743         panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
1744
1745         update_slots = ctx->update_mask;
1746         while (update_slots) {
1747                 struct panthor_fw_csg_iface *csg_iface;
1748                 u32 csg_id = ffs(update_slots) - 1;
1749                 u32 req_mask = ctx->requests[csg_id].mask, acked;
1750                 int ret;
1751
1752                 update_slots &= ~BIT(csg_id);
1753                 csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1754
1755                 ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
1756
1757                 if (acked & CSG_ENDPOINT_CONFIG)
1758                         csg_slot_sync_priority_locked(ptdev, csg_id);
1759
1760                 if (acked & CSG_STATE_MASK)
1761                         csg_slot_sync_state_locked(ptdev, csg_id);
1762
1763                 if (acked & CSG_STATUS_UPDATE) {
1764                         csg_slot_sync_queues_state_locked(ptdev, csg_id);
1765                         csg_slot_sync_idle_state_locked(ptdev, csg_id);
1766                 }
1767
1768                 if (ret && acked != req_mask &&
1769                     ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
1770                         drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
1771                         ctx->timedout_mask |= BIT(csg_id);
1772                 }
1773         }
1774
1775         if (ctx->timedout_mask)
1776                 return -ETIMEDOUT;
1777
1778         return 0;
1779 }
1780
1781 struct panthor_sched_tick_ctx {
1782         struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
1783         struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
1784         u32 idle_group_count;
1785         u32 group_count;
1786         enum panthor_csg_priority min_priority;
1787         struct panthor_vm *vms[MAX_CS_PER_CSG];
1788         u32 as_count;
1789         bool immediate_tick;
1790         u32 csg_upd_failed_mask;
1791 };
1792
1793 static bool
1794 tick_ctx_is_full(const struct panthor_scheduler *sched,
1795                  const struct panthor_sched_tick_ctx *ctx)
1796 {
1797         return ctx->group_count == sched->csg_slot_count;
1798 }
1799
1800 static bool
1801 group_is_idle(struct panthor_group *group)
1802 {
1803         struct panthor_device *ptdev = group->ptdev;
1804         u32 inactive_queues;
1805
1806         if (group->csg_id >= 0)
1807                 return ptdev->scheduler->csg_slots[group->csg_id].idle;
1808
1809         inactive_queues = group->idle_queues | group->blocked_queues;
1810         return hweight32(inactive_queues) == group->queue_count;
1811 }
1812
1813 static bool
1814 group_can_run(struct panthor_group *group)
1815 {
1816         return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1817                group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
1818                !group->destroyed && group->fatal_queues == 0 &&
1819                !group->timedout;
1820 }
1821
1822 static void
1823 tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
1824                                struct panthor_sched_tick_ctx *ctx,
1825                                struct list_head *queue,
1826                                bool skip_idle_groups,
1827                                bool owned_by_tick_ctx)
1828 {
1829         struct panthor_group *group, *tmp;
1830
1831         if (tick_ctx_is_full(sched, ctx))
1832                 return;
1833
1834         list_for_each_entry_safe(group, tmp, queue, run_node) {
1835                 u32 i;
1836
1837                 if (!group_can_run(group))
1838                         continue;
1839
1840                 if (skip_idle_groups && group_is_idle(group))
1841                         continue;
1842
1843                 for (i = 0; i < ctx->as_count; i++) {
1844                         if (ctx->vms[i] == group->vm)
1845                                 break;
1846                 }
1847
1848                 if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
1849                         continue;
1850
1851                 if (!owned_by_tick_ctx)
1852                         group_get(group);
1853
1854                 list_move_tail(&group->run_node, &ctx->groups[group->priority]);
1855                 ctx->group_count++;
1856                 if (group_is_idle(group))
1857                         ctx->idle_group_count++;
1858
1859                 if (i == ctx->as_count)
1860                         ctx->vms[ctx->as_count++] = group->vm;
1861
1862                 if (ctx->min_priority > group->priority)
1863                         ctx->min_priority = group->priority;
1864
1865                 if (tick_ctx_is_full(sched, ctx))
1866                         return;
1867         }
1868 }
1869
1870 static void
1871 tick_ctx_insert_old_group(struct panthor_scheduler *sched,
1872                           struct panthor_sched_tick_ctx *ctx,
1873                           struct panthor_group *group,
1874                           bool full_tick)
1875 {
1876         struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
1877         struct panthor_group *other_group;
1878
1879         if (!full_tick) {
1880                 list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1881                 return;
1882         }
1883
1884         /* Rotate to make sure groups with lower CSG slot
1885          * priorities have a chance to get a higher CSG slot
1886          * priority next time they get picked. This priority
1887          * has an impact on resource request ordering, so it's
1888          * important to make sure we don't let one group starve
1889          * all other groups with the same group priority.
1890          */
1891         list_for_each_entry(other_group,
1892                             &ctx->old_groups[csg_slot->group->priority],
1893                             run_node) {
1894                 struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
1895
1896                 if (other_csg_slot->priority > csg_slot->priority) {
1897                         list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
1898                         return;
1899                 }
1900         }
1901
1902         list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1903 }
1904
1905 static void
1906 tick_ctx_init(struct panthor_scheduler *sched,
1907               struct panthor_sched_tick_ctx *ctx,
1908               bool full_tick)
1909 {
1910         struct panthor_device *ptdev = sched->ptdev;
1911         struct panthor_csg_slots_upd_ctx upd_ctx;
1912         int ret;
1913         u32 i;
1914
1915         memset(ctx, 0, sizeof(*ctx));
1916         csgs_upd_ctx_init(&upd_ctx);
1917
1918         ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
1919         for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
1920                 INIT_LIST_HEAD(&ctx->groups[i]);
1921                 INIT_LIST_HEAD(&ctx->old_groups[i]);
1922         }
1923
1924         for (i = 0; i < sched->csg_slot_count; i++) {
1925                 struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
1926                 struct panthor_group *group = csg_slot->group;
1927                 struct panthor_fw_csg_iface *csg_iface;
1928
1929                 if (!group)
1930                         continue;
1931
1932                 csg_iface = panthor_fw_get_csg_iface(ptdev, i);
1933                 group_get(group);
1934
1935                 /* If there was unhandled faults on the VM, force processing of
1936                  * CSG IRQs, so we can flag the faulty queue.
1937                  */
1938                 if (panthor_vm_has_unhandled_faults(group->vm)) {
1939                         sched_process_csg_irq_locked(ptdev, i);
1940
1941                         /* No fatal fault reported, flag all queues as faulty. */
1942                         if (!group->fatal_queues)
1943                                 group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
1944                 }
1945
1946                 tick_ctx_insert_old_group(sched, ctx, group, full_tick);
1947                 csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
1948                                         csg_iface->output->ack ^ CSG_STATUS_UPDATE,
1949                                         CSG_STATUS_UPDATE);
1950         }
1951
1952         ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
1953         if (ret) {
1954                 panthor_device_schedule_reset(ptdev);
1955                 ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
1956         }
1957 }
1958
1959 #define NUM_INSTRS_PER_SLOT             16
1960
1961 static void
1962 group_term_post_processing(struct panthor_group *group)
1963 {
1964         struct panthor_job *job, *tmp;
1965         LIST_HEAD(faulty_jobs);
1966         bool cookie;
1967         u32 i = 0;
1968
1969         if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
1970                 return;
1971
1972         cookie = dma_fence_begin_signalling();
1973         for (i = 0; i < group->queue_count; i++) {
1974                 struct panthor_queue *queue = group->queues[i];
1975                 struct panthor_syncobj_64b *syncobj;
1976                 int err;
1977
1978                 if (group->fatal_queues & BIT(i))
1979                         err = -EINVAL;
1980                 else if (group->timedout)
1981                         err = -ETIMEDOUT;
1982                 else
1983                         err = -ECANCELED;
1984
1985                 if (!queue)
1986                         continue;
1987
1988                 spin_lock(&queue->fence_ctx.lock);
1989                 list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
1990                         list_move_tail(&job->node, &faulty_jobs);
1991                         dma_fence_set_error(job->done_fence, err);
1992                         dma_fence_signal_locked(job->done_fence);
1993                 }
1994                 spin_unlock(&queue->fence_ctx.lock);
1995
1996                 /* Manually update the syncobj seqno to unblock waiters. */
1997                 syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
1998                 syncobj->status = ~0;
1999                 syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
2000                 sched_queue_work(group->ptdev->scheduler, sync_upd);
2001         }
2002         dma_fence_end_signalling(cookie);
2003
2004         list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
2005                 list_del_init(&job->node);
2006                 panthor_job_put(&job->base);
2007         }
2008 }
2009
2010 static void group_term_work(struct work_struct *work)
2011 {
2012         struct panthor_group *group =
2013                 container_of(work, struct panthor_group, term_work);
2014
2015         group_term_post_processing(group);
2016         group_put(group);
2017 }
2018
2019 static void
2020 tick_ctx_cleanup(struct panthor_scheduler *sched,
2021                  struct panthor_sched_tick_ctx *ctx)
2022 {
2023         struct panthor_group *group, *tmp;
2024         u32 i;
2025
2026         for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
2027                 list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
2028                         /* If everything went fine, we should only have groups
2029                          * to be terminated in the old_groups lists.
2030                          */
2031                         drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
2032                                     group_can_run(group));
2033
2034                         if (!group_can_run(group)) {
2035                                 list_del_init(&group->run_node);
2036                                 list_del_init(&group->wait_node);
2037                                 group_queue_work(group, term);
2038                         } else if (group->csg_id >= 0) {
2039                                 list_del_init(&group->run_node);
2040                         } else {
2041                                 list_move(&group->run_node,
2042                                           group_is_idle(group) ?
2043                                           &sched->groups.idle[group->priority] :
2044                                           &sched->groups.runnable[group->priority]);
2045                         }
2046                         group_put(group);
2047                 }
2048         }
2049
2050         for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
2051                 /* If everything went fine, the groups to schedule lists should
2052                  * be empty.
2053                  */
2054                 drm_WARN_ON(&group->ptdev->base,
2055                             !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
2056
2057                 list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
2058                         if (group->csg_id >= 0) {
2059                                 list_del_init(&group->run_node);
2060                         } else {
2061                                 list_move(&group->run_node,
2062                                           group_is_idle(group) ?
2063                                           &sched->groups.idle[group->priority] :
2064                                           &sched->groups.runnable[group->priority]);
2065                         }
2066                         group_put(group);
2067                 }
2068         }
2069 }
2070
2071 static void
2072 tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
2073 {
2074         struct panthor_group *group, *tmp;
2075         struct panthor_device *ptdev = sched->ptdev;
2076         struct panthor_csg_slot *csg_slot;
2077         int prio, new_csg_prio = MAX_CSG_PRIO, i;
2078         u32 free_csg_slots = 0;
2079         struct panthor_csg_slots_upd_ctx upd_ctx;
2080         int ret;
2081
2082         csgs_upd_ctx_init(&upd_ctx);
2083
2084         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2085                 /* Suspend or terminate evicted groups. */
2086                 list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2087                         bool term = !group_can_run(group);
2088                         int csg_id = group->csg_id;
2089
2090                         if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2091                                 continue;
2092
2093                         csg_slot = &sched->csg_slots[csg_id];
2094                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2095                                                 term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
2096                                                 CSG_STATE_MASK);
2097                 }
2098
2099                 /* Update priorities on already running groups. */
2100                 list_for_each_entry(group, &ctx->groups[prio], run_node) {
2101                         struct panthor_fw_csg_iface *csg_iface;
2102                         int csg_id = group->csg_id;
2103
2104                         if (csg_id < 0) {
2105                                 new_csg_prio--;
2106                                 continue;
2107                         }
2108
2109                         csg_slot = &sched->csg_slots[csg_id];
2110                         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2111                         if (csg_slot->priority == new_csg_prio) {
2112                                 new_csg_prio--;
2113                                 continue;
2114                         }
2115
2116                         panthor_fw_update_reqs(csg_iface, endpoint_req,
2117                                                CSG_EP_REQ_PRIORITY(new_csg_prio),
2118                                                CSG_EP_REQ_PRIORITY_MASK);
2119                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2120                                                 csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2121                                                 CSG_ENDPOINT_CONFIG);
2122                         new_csg_prio--;
2123                 }
2124         }
2125
2126         ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2127         if (ret) {
2128                 panthor_device_schedule_reset(ptdev);
2129                 ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2130                 return;
2131         }
2132
2133         /* Unbind evicted groups. */
2134         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2135                 list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2136                         /* This group is gone. Process interrupts to clear
2137                          * any pending interrupts before we start the new
2138                          * group.
2139                          */
2140                         if (group->csg_id >= 0)
2141                                 sched_process_csg_irq_locked(ptdev, group->csg_id);
2142
2143                         group_unbind_locked(group);
2144                 }
2145         }
2146
2147         for (i = 0; i < sched->csg_slot_count; i++) {
2148                 if (!sched->csg_slots[i].group)
2149                         free_csg_slots |= BIT(i);
2150         }
2151
2152         csgs_upd_ctx_init(&upd_ctx);
2153         new_csg_prio = MAX_CSG_PRIO;
2154
2155         /* Start new groups. */
2156         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2157                 list_for_each_entry(group, &ctx->groups[prio], run_node) {
2158                         int csg_id = group->csg_id;
2159                         struct panthor_fw_csg_iface *csg_iface;
2160
2161                         if (csg_id >= 0) {
2162                                 new_csg_prio--;
2163                                 continue;
2164                         }
2165
2166                         csg_id = ffs(free_csg_slots) - 1;
2167                         if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2168                                 break;
2169
2170                         csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2171                         csg_slot = &sched->csg_slots[csg_id];
2172                         group_bind_locked(group, csg_id);
2173                         csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
2174                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2175                                                 group->state == PANTHOR_CS_GROUP_SUSPENDED ?
2176                                                 CSG_STATE_RESUME : CSG_STATE_START,
2177                                                 CSG_STATE_MASK);
2178                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2179                                                 csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2180                                                 CSG_ENDPOINT_CONFIG);
2181                         free_csg_slots &= ~BIT(csg_id);
2182                 }
2183         }
2184
2185         ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2186         if (ret) {
2187                 panthor_device_schedule_reset(ptdev);
2188                 ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2189                 return;
2190         }
2191
2192         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2193                 list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
2194                         list_del_init(&group->run_node);
2195
2196                         /* If the group has been destroyed while we were
2197                          * scheduling, ask for an immediate tick to
2198                          * re-evaluate as soon as possible and get rid of
2199                          * this dangling group.
2200                          */
2201                         if (group->destroyed)
2202                                 ctx->immediate_tick = true;
2203                         group_put(group);
2204                 }
2205
2206                 /* Return evicted groups to the idle or run queues. Groups
2207                  * that can no longer be run (because they've been destroyed
2208                  * or experienced an unrecoverable error) will be scheduled
2209                  * for destruction in tick_ctx_cleanup().
2210                  */
2211                 list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
2212                         if (!group_can_run(group))
2213                                 continue;
2214
2215                         if (group_is_idle(group))
2216                                 list_move_tail(&group->run_node, &sched->groups.idle[prio]);
2217                         else
2218                                 list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
2219                         group_put(group);
2220                 }
2221         }
2222
2223         sched->used_csg_slot_count = ctx->group_count;
2224         sched->might_have_idle_groups = ctx->idle_group_count > 0;
2225 }
2226
2227 static u64
2228 tick_ctx_update_resched_target(struct panthor_scheduler *sched,
2229                                const struct panthor_sched_tick_ctx *ctx)
2230 {
2231         /* We had space left, no need to reschedule until some external event happens. */
2232         if (!tick_ctx_is_full(sched, ctx))
2233                 goto no_tick;
2234
2235         /* If idle groups were scheduled, no need to wake up until some external
2236          * event happens (group unblocked, new job submitted, ...).
2237          */
2238         if (ctx->idle_group_count)
2239                 goto no_tick;
2240
2241         if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
2242                 goto no_tick;
2243
2244         /* If there are groups of the same priority waiting, we need to
2245          * keep the scheduler ticking, otherwise, we'll just wait for
2246          * new groups with higher priority to be queued.
2247          */
2248         if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
2249                 u64 resched_target = sched->last_tick + sched->tick_period;
2250
2251                 if (time_before64(sched->resched_target, sched->last_tick) ||
2252                     time_before64(resched_target, sched->resched_target))
2253                         sched->resched_target = resched_target;
2254
2255                 return sched->resched_target - sched->last_tick;
2256         }
2257
2258 no_tick:
2259         sched->resched_target = U64_MAX;
2260         return U64_MAX;
2261 }
2262
2263 static void tick_work(struct work_struct *work)
2264 {
2265         struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
2266                                                       tick_work.work);
2267         struct panthor_device *ptdev = sched->ptdev;
2268         struct panthor_sched_tick_ctx ctx;
2269         u64 remaining_jiffies = 0, resched_delay;
2270         u64 now = get_jiffies_64();
2271         int prio, ret, cookie;
2272
2273         if (!drm_dev_enter(&ptdev->base, &cookie))
2274                 return;
2275
2276         ret = pm_runtime_resume_and_get(ptdev->base.dev);
2277         if (drm_WARN_ON(&ptdev->base, ret))
2278                 goto out_dev_exit;
2279
2280         if (time_before64(now, sched->resched_target))
2281                 remaining_jiffies = sched->resched_target - now;
2282
2283         mutex_lock(&sched->lock);
2284         if (panthor_device_reset_is_pending(sched->ptdev))
2285                 goto out_unlock;
2286
2287         tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
2288         if (ctx.csg_upd_failed_mask)
2289                 goto out_cleanup_ctx;
2290
2291         if (remaining_jiffies) {
2292                 /* Scheduling forced in the middle of a tick. Only RT groups
2293                  * can preempt non-RT ones. Currently running RT groups can't be
2294                  * preempted.
2295                  */
2296                 for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2297                      prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2298                      prio--) {
2299                         tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
2300                                                        true, true);
2301                         if (prio == PANTHOR_CSG_PRIORITY_RT) {
2302                                 tick_ctx_pick_groups_from_list(sched, &ctx,
2303                                                                &sched->groups.runnable[prio],
2304                                                                true, false);
2305                         }
2306                 }
2307         }
2308
2309         /* First pick non-idle groups */
2310         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2311              prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2312              prio--) {
2313                 tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
2314                                                true, false);
2315                 tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
2316         }
2317
2318         /* If we have free CSG slots left, pick idle groups */
2319         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2320              prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2321              prio--) {
2322                 /* Check the old_group queue first to avoid reprogramming the slots */
2323                 tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
2324                 tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
2325                                                false, false);
2326         }
2327
2328         tick_ctx_apply(sched, &ctx);
2329         if (ctx.csg_upd_failed_mask)
2330                 goto out_cleanup_ctx;
2331
2332         if (ctx.idle_group_count == ctx.group_count) {
2333                 panthor_devfreq_record_idle(sched->ptdev);
2334                 if (sched->pm.has_ref) {
2335                         pm_runtime_put_autosuspend(ptdev->base.dev);
2336                         sched->pm.has_ref = false;
2337                 }
2338         } else {
2339                 panthor_devfreq_record_busy(sched->ptdev);
2340                 if (!sched->pm.has_ref) {
2341                         pm_runtime_get(ptdev->base.dev);
2342                         sched->pm.has_ref = true;
2343                 }
2344         }
2345
2346         sched->last_tick = now;
2347         resched_delay = tick_ctx_update_resched_target(sched, &ctx);
2348         if (ctx.immediate_tick)
2349                 resched_delay = 0;
2350
2351         if (resched_delay != U64_MAX)
2352                 sched_queue_delayed_work(sched, tick, resched_delay);
2353
2354 out_cleanup_ctx:
2355         tick_ctx_cleanup(sched, &ctx);
2356
2357 out_unlock:
2358         mutex_unlock(&sched->lock);
2359         pm_runtime_mark_last_busy(ptdev->base.dev);
2360         pm_runtime_put_autosuspend(ptdev->base.dev);
2361
2362 out_dev_exit:
2363         drm_dev_exit(cookie);
2364 }
2365
2366 static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
2367 {
2368         struct panthor_queue *queue = group->queues[queue_idx];
2369         union {
2370                 struct panthor_syncobj_64b sync64;
2371                 struct panthor_syncobj_32b sync32;
2372         } *syncobj;
2373         bool result;
2374         u64 value;
2375
2376         syncobj = panthor_queue_get_syncwait_obj(group, queue);
2377         if (!syncobj)
2378                 return -EINVAL;
2379
2380         value = queue->syncwait.sync64 ?
2381                 syncobj->sync64.seqno :
2382                 syncobj->sync32.seqno;
2383
2384         if (queue->syncwait.gt)
2385                 result = value > queue->syncwait.ref;
2386         else
2387                 result = value <= queue->syncwait.ref;
2388
2389         if (result)
2390                 panthor_queue_put_syncwait_obj(queue);
2391
2392         return result;
2393 }
2394
2395 static void sync_upd_work(struct work_struct *work)
2396 {
2397         struct panthor_scheduler *sched = container_of(work,
2398                                                       struct panthor_scheduler,
2399                                                       sync_upd_work);
2400         struct panthor_group *group, *tmp;
2401         bool immediate_tick = false;
2402
2403         mutex_lock(&sched->lock);
2404         list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
2405                 u32 tested_queues = group->blocked_queues;
2406                 u32 unblocked_queues = 0;
2407
2408                 while (tested_queues) {
2409                         u32 cs_id = ffs(tested_queues) - 1;
2410                         int ret;
2411
2412                         ret = panthor_queue_eval_syncwait(group, cs_id);
2413                         drm_WARN_ON(&group->ptdev->base, ret < 0);
2414                         if (ret)
2415                                 unblocked_queues |= BIT(cs_id);
2416
2417                         tested_queues &= ~BIT(cs_id);
2418                 }
2419
2420                 if (unblocked_queues) {
2421                         group->blocked_queues &= ~unblocked_queues;
2422
2423                         if (group->csg_id < 0) {
2424                                 list_move(&group->run_node,
2425                                           &sched->groups.runnable[group->priority]);
2426                                 if (group->priority == PANTHOR_CSG_PRIORITY_RT)
2427                                         immediate_tick = true;
2428                         }
2429                 }
2430
2431                 if (!group->blocked_queues)
2432                         list_del_init(&group->wait_node);
2433         }
2434         mutex_unlock(&sched->lock);
2435
2436         if (immediate_tick)
2437                 sched_queue_delayed_work(sched, tick, 0);
2438 }
2439
2440 static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
2441 {
2442         struct panthor_device *ptdev = group->ptdev;
2443         struct panthor_scheduler *sched = ptdev->scheduler;
2444         struct list_head *queue = &sched->groups.runnable[group->priority];
2445         u64 delay_jiffies = 0;
2446         bool was_idle;
2447         u64 now;
2448
2449         if (!group_can_run(group))
2450                 return;
2451
2452         /* All updated queues are blocked, no need to wake up the scheduler. */
2453         if ((queue_mask & group->blocked_queues) == queue_mask)
2454                 return;
2455
2456         was_idle = group_is_idle(group);
2457         group->idle_queues &= ~queue_mask;
2458
2459         /* Don't mess up with the lists if we're in a middle of a reset. */
2460         if (atomic_read(&sched->reset.in_progress))
2461                 return;
2462
2463         if (was_idle && !group_is_idle(group))
2464                 list_move_tail(&group->run_node, queue);
2465
2466         /* RT groups are preemptive. */
2467         if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
2468                 sched_queue_delayed_work(sched, tick, 0);
2469                 return;
2470         }
2471
2472         /* Some groups might be idle, force an immediate tick to
2473          * re-evaluate.
2474          */
2475         if (sched->might_have_idle_groups) {
2476                 sched_queue_delayed_work(sched, tick, 0);
2477                 return;
2478         }
2479
2480         /* Scheduler is ticking, nothing to do. */
2481         if (sched->resched_target != U64_MAX) {
2482                 /* If there are free slots, force immediating ticking. */
2483                 if (sched->used_csg_slot_count < sched->csg_slot_count)
2484                         sched_queue_delayed_work(sched, tick, 0);
2485
2486                 return;
2487         }
2488
2489         /* Scheduler tick was off, recalculate the resched_target based on the
2490          * last tick event, and queue the scheduler work.
2491          */
2492         now = get_jiffies_64();
2493         sched->resched_target = sched->last_tick + sched->tick_period;
2494         if (sched->used_csg_slot_count == sched->csg_slot_count &&
2495             time_before64(now, sched->resched_target))
2496                 delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
2497
2498         sched_queue_delayed_work(sched, tick, delay_jiffies);
2499 }
2500
2501 static void queue_stop(struct panthor_queue *queue,
2502                        struct panthor_job *bad_job)
2503 {
2504         drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
2505 }
2506
2507 static void queue_start(struct panthor_queue *queue)
2508 {
2509         struct panthor_job *job;
2510
2511         /* Re-assign the parent fences. */
2512         list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
2513                 job->base.s_fence->parent = dma_fence_get(job->done_fence);
2514
2515         drm_sched_start(&queue->scheduler, true);
2516 }
2517
2518 static void panthor_group_stop(struct panthor_group *group)
2519 {
2520         struct panthor_scheduler *sched = group->ptdev->scheduler;
2521
2522         lockdep_assert_held(&sched->reset.lock);
2523
2524         for (u32 i = 0; i < group->queue_count; i++)
2525                 queue_stop(group->queues[i], NULL);
2526
2527         group_get(group);
2528         list_move_tail(&group->run_node, &sched->reset.stopped_groups);
2529 }
2530
2531 static void panthor_group_start(struct panthor_group *group)
2532 {
2533         struct panthor_scheduler *sched = group->ptdev->scheduler;
2534
2535         lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
2536
2537         for (u32 i = 0; i < group->queue_count; i++)
2538                 queue_start(group->queues[i]);
2539
2540         if (group_can_run(group)) {
2541                 list_move_tail(&group->run_node,
2542                                group_is_idle(group) ?
2543                                &sched->groups.idle[group->priority] :
2544                                &sched->groups.runnable[group->priority]);
2545         } else {
2546                 list_del_init(&group->run_node);
2547                 list_del_init(&group->wait_node);
2548                 group_queue_work(group, term);
2549         }
2550
2551         group_put(group);
2552 }
2553
2554 static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
2555 {
2556         struct panthor_scheduler *sched = ptdev->scheduler;
2557
2558         sched_queue_delayed_work(sched, tick, 0);
2559 }
2560
2561 /**
2562  * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
2563  */
2564 void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
2565 {
2566         /* Force a tick to immediately kill faulty groups. */
2567         if (ptdev->scheduler)
2568                 panthor_sched_immediate_tick(ptdev);
2569 }
2570
2571 void panthor_sched_resume(struct panthor_device *ptdev)
2572 {
2573         /* Force a tick to re-evaluate after a resume. */
2574         panthor_sched_immediate_tick(ptdev);
2575 }
2576
2577 void panthor_sched_suspend(struct panthor_device *ptdev)
2578 {
2579         struct panthor_scheduler *sched = ptdev->scheduler;
2580         struct panthor_csg_slots_upd_ctx upd_ctx;
2581         struct panthor_group *group;
2582         u32 suspended_slots;
2583         u32 i;
2584
2585         mutex_lock(&sched->lock);
2586         csgs_upd_ctx_init(&upd_ctx);
2587         for (i = 0; i < sched->csg_slot_count; i++) {
2588                 struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2589
2590                 if (csg_slot->group) {
2591                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2592                                                 group_can_run(csg_slot->group) ?
2593                                                 CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
2594                                                 CSG_STATE_MASK);
2595                 }
2596         }
2597
2598         suspended_slots = upd_ctx.update_mask;
2599
2600         csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2601         suspended_slots &= ~upd_ctx.timedout_mask;
2602
2603         if (upd_ctx.timedout_mask) {
2604                 u32 slot_mask = upd_ctx.timedout_mask;
2605
2606                 drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
2607                 csgs_upd_ctx_init(&upd_ctx);
2608                 while (slot_mask) {
2609                         u32 csg_id = ffs(slot_mask) - 1;
2610
2611                         csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2612                                                 CSG_STATE_TERMINATE,
2613                                                 CSG_STATE_MASK);
2614                         slot_mask &= ~BIT(csg_id);
2615                 }
2616
2617                 csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2618
2619                 slot_mask = upd_ctx.timedout_mask;
2620                 while (slot_mask) {
2621                         u32 csg_id = ffs(slot_mask) - 1;
2622                         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2623
2624                         /* Terminate command timedout, but the soft-reset will
2625                          * automatically terminate all active groups, so let's
2626                          * force the state to halted here.
2627                          */
2628                         if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
2629                                 csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2630                         slot_mask &= ~BIT(csg_id);
2631                 }
2632         }
2633
2634         /* Flush L2 and LSC caches to make sure suspend state is up-to-date.
2635          * If the flush fails, flag all queues for termination.
2636          */
2637         if (suspended_slots) {
2638                 bool flush_caches_failed = false;
2639                 u32 slot_mask = suspended_slots;
2640
2641                 if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
2642                         flush_caches_failed = true;
2643
2644                 while (slot_mask) {
2645                         u32 csg_id = ffs(slot_mask) - 1;
2646                         struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2647
2648                         if (flush_caches_failed)
2649                                 csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2650                         else
2651                                 csg_slot_sync_update_locked(ptdev, csg_id);
2652
2653                         slot_mask &= ~BIT(csg_id);
2654                 }
2655         }
2656
2657         for (i = 0; i < sched->csg_slot_count; i++) {
2658                 struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2659
2660                 group = csg_slot->group;
2661                 if (!group)
2662                         continue;
2663
2664                 group_get(group);
2665
2666                 if (group->csg_id >= 0)
2667                         sched_process_csg_irq_locked(ptdev, group->csg_id);
2668
2669                 group_unbind_locked(group);
2670
2671                 drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
2672
2673                 if (group_can_run(group)) {
2674                         list_add(&group->run_node,
2675                                  &sched->groups.idle[group->priority]);
2676                 } else {
2677                         /* We don't bother stopping the scheduler if the group is
2678                          * faulty, the group termination work will finish the job.
2679                          */
2680                         list_del_init(&group->wait_node);
2681                         group_queue_work(group, term);
2682                 }
2683                 group_put(group);
2684         }
2685         mutex_unlock(&sched->lock);
2686 }
2687
2688 void panthor_sched_pre_reset(struct panthor_device *ptdev)
2689 {
2690         struct panthor_scheduler *sched = ptdev->scheduler;
2691         struct panthor_group *group, *group_tmp;
2692         u32 i;
2693
2694         mutex_lock(&sched->reset.lock);
2695         atomic_set(&sched->reset.in_progress, true);
2696
2697         /* Cancel all scheduler works. Once this is done, these works can't be
2698          * scheduled again until the reset operation is complete.
2699          */
2700         cancel_work_sync(&sched->sync_upd_work);
2701         cancel_delayed_work_sync(&sched->tick_work);
2702
2703         panthor_sched_suspend(ptdev);
2704
2705         /* Stop all groups that might still accept jobs, so we don't get passed
2706          * new jobs while we're resetting.
2707          */
2708         for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
2709                 /* All groups should be in the idle lists. */
2710                 drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
2711                 list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
2712                         panthor_group_stop(group);
2713         }
2714
2715         for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
2716                 list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
2717                         panthor_group_stop(group);
2718         }
2719
2720         mutex_unlock(&sched->reset.lock);
2721 }
2722
2723 void panthor_sched_post_reset(struct panthor_device *ptdev)
2724 {
2725         struct panthor_scheduler *sched = ptdev->scheduler;
2726         struct panthor_group *group, *group_tmp;
2727
2728         mutex_lock(&sched->reset.lock);
2729
2730         list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
2731                 panthor_group_start(group);
2732
2733         /* We're done resetting the GPU, clear the reset.in_progress bit so we can
2734          * kick the scheduler.
2735          */
2736         atomic_set(&sched->reset.in_progress, false);
2737         mutex_unlock(&sched->reset.lock);
2738
2739         sched_queue_delayed_work(sched, tick, 0);
2740
2741         sched_queue_work(sched, sync_upd);
2742 }
2743
2744 static void group_sync_upd_work(struct work_struct *work)
2745 {
2746         struct panthor_group *group =
2747                 container_of(work, struct panthor_group, sync_upd_work);
2748         struct panthor_job *job, *job_tmp;
2749         LIST_HEAD(done_jobs);
2750         u32 queue_idx;
2751         bool cookie;
2752
2753         cookie = dma_fence_begin_signalling();
2754         for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
2755                 struct panthor_queue *queue = group->queues[queue_idx];
2756                 struct panthor_syncobj_64b *syncobj;
2757
2758                 if (!queue)
2759                         continue;
2760
2761                 syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
2762
2763                 spin_lock(&queue->fence_ctx.lock);
2764                 list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
2765                         if (!job->call_info.size)
2766                                 continue;
2767
2768                         if (syncobj->seqno < job->done_fence->seqno)
2769                                 break;
2770
2771                         list_move_tail(&job->node, &done_jobs);
2772                         dma_fence_signal_locked(job->done_fence);
2773                 }
2774                 spin_unlock(&queue->fence_ctx.lock);
2775         }
2776         dma_fence_end_signalling(cookie);
2777
2778         list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
2779                 list_del_init(&job->node);
2780                 panthor_job_put(&job->base);
2781         }
2782
2783         group_put(group);
2784 }
2785
2786 static struct dma_fence *
2787 queue_run_job(struct drm_sched_job *sched_job)
2788 {
2789         struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2790         struct panthor_group *group = job->group;
2791         struct panthor_queue *queue = group->queues[job->queue_idx];
2792         struct panthor_device *ptdev = group->ptdev;
2793         struct panthor_scheduler *sched = ptdev->scheduler;
2794         u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
2795         u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
2796         u64 addr_reg = ptdev->csif_info.cs_reg_count -
2797                        ptdev->csif_info.unpreserved_cs_reg_count;
2798         u64 val_reg = addr_reg + 2;
2799         u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
2800                         job->queue_idx * sizeof(struct panthor_syncobj_64b);
2801         u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
2802         struct dma_fence *done_fence;
2803         int ret;
2804
2805         u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
2806                 /* MOV32 rX+2, cs.latest_flush */
2807                 (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
2808
2809                 /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
2810                 (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
2811
2812                 /* MOV48 rX:rX+1, cs.start */
2813                 (1ull << 56) | (addr_reg << 48) | job->call_info.start,
2814
2815                 /* MOV32 rX+2, cs.size */
2816                 (2ull << 56) | (val_reg << 48) | job->call_info.size,
2817
2818                 /* WAIT(0) => waits for FLUSH_CACHE2 instruction */
2819                 (3ull << 56) | (1 << 16),
2820
2821                 /* CALL rX:rX+1, rX+2 */
2822                 (32ull << 56) | (addr_reg << 40) | (val_reg << 32),
2823
2824                 /* MOV48 rX:rX+1, sync_addr */
2825                 (1ull << 56) | (addr_reg << 48) | sync_addr,
2826
2827                 /* MOV48 rX+2, #1 */
2828                 (1ull << 56) | (val_reg << 48) | 1,
2829
2830                 /* WAIT(all) */
2831                 (3ull << 56) | (waitall_mask << 16),
2832
2833                 /* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
2834                 (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
2835
2836                 /* ERROR_BARRIER, so we can recover from faults at job
2837                  * boundaries.
2838                  */
2839                 (47ull << 56),
2840         };
2841
2842         /* Need to be cacheline aligned to please the prefetcher. */
2843         static_assert(sizeof(call_instrs) % 64 == 0,
2844                       "call_instrs is not aligned on a cacheline");
2845
2846         /* Stream size is zero, nothing to do => return a NULL fence and let
2847          * drm_sched signal the parent.
2848          */
2849         if (!job->call_info.size)
2850                 return NULL;
2851
2852         ret = pm_runtime_resume_and_get(ptdev->base.dev);
2853         if (drm_WARN_ON(&ptdev->base, ret))
2854                 return ERR_PTR(ret);
2855
2856         mutex_lock(&sched->lock);
2857         if (!group_can_run(group)) {
2858                 done_fence = ERR_PTR(-ECANCELED);
2859                 goto out_unlock;
2860         }
2861
2862         dma_fence_init(job->done_fence,
2863                        &panthor_queue_fence_ops,
2864                        &queue->fence_ctx.lock,
2865                        queue->fence_ctx.id,
2866                        atomic64_inc_return(&queue->fence_ctx.seqno));
2867
2868         memcpy(queue->ringbuf->kmap + ringbuf_insert,
2869                call_instrs, sizeof(call_instrs));
2870
2871         panthor_job_get(&job->base);
2872         spin_lock(&queue->fence_ctx.lock);
2873         list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
2874         spin_unlock(&queue->fence_ctx.lock);
2875
2876         job->ringbuf.start = queue->iface.input->insert;
2877         job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
2878
2879         /* Make sure the ring buffer is updated before the INSERT
2880          * register.
2881          */
2882         wmb();
2883
2884         queue->iface.input->extract = queue->iface.output->extract;
2885         queue->iface.input->insert = job->ringbuf.end;
2886
2887         if (group->csg_id < 0) {
2888                 /* If the queue is blocked, we want to keep the timeout running, so we
2889                  * can detect unbounded waits and kill the group when that happens.
2890                  * Otherwise, we suspend the timeout so the time we spend waiting for
2891                  * a CSG slot is not counted.
2892                  */
2893                 if (!(group->blocked_queues & BIT(job->queue_idx)) &&
2894                     !queue->timeout_suspended) {
2895                         queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
2896                         queue->timeout_suspended = true;
2897                 }
2898
2899                 group_schedule_locked(group, BIT(job->queue_idx));
2900         } else {
2901                 gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
2902                 if (!sched->pm.has_ref &&
2903                     !(group->blocked_queues & BIT(job->queue_idx))) {
2904                         pm_runtime_get(ptdev->base.dev);
2905                         sched->pm.has_ref = true;
2906                 }
2907         }
2908
2909         done_fence = dma_fence_get(job->done_fence);
2910
2911 out_unlock:
2912         mutex_unlock(&sched->lock);
2913         pm_runtime_mark_last_busy(ptdev->base.dev);
2914         pm_runtime_put_autosuspend(ptdev->base.dev);
2915
2916         return done_fence;
2917 }
2918
2919 static enum drm_gpu_sched_stat
2920 queue_timedout_job(struct drm_sched_job *sched_job)
2921 {
2922         struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2923         struct panthor_group *group = job->group;
2924         struct panthor_device *ptdev = group->ptdev;
2925         struct panthor_scheduler *sched = ptdev->scheduler;
2926         struct panthor_queue *queue = group->queues[job->queue_idx];
2927
2928         drm_warn(&ptdev->base, "job timeout\n");
2929
2930         drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
2931
2932         queue_stop(queue, job);
2933
2934         mutex_lock(&sched->lock);
2935         group->timedout = true;
2936         if (group->csg_id >= 0) {
2937                 sched_queue_delayed_work(ptdev->scheduler, tick, 0);
2938         } else {
2939                 /* Remove from the run queues, so the scheduler can't
2940                  * pick the group on the next tick.
2941                  */
2942                 list_del_init(&group->run_node);
2943                 list_del_init(&group->wait_node);
2944
2945                 group_queue_work(group, term);
2946         }
2947         mutex_unlock(&sched->lock);
2948
2949         queue_start(queue);
2950
2951         return DRM_GPU_SCHED_STAT_NOMINAL;
2952 }
2953
2954 static void queue_free_job(struct drm_sched_job *sched_job)
2955 {
2956         drm_sched_job_cleanup(sched_job);
2957         panthor_job_put(sched_job);
2958 }
2959
2960 static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
2961         .run_job = queue_run_job,
2962         .timedout_job = queue_timedout_job,
2963         .free_job = queue_free_job,
2964 };
2965
2966 static struct panthor_queue *
2967 group_create_queue(struct panthor_group *group,
2968                    const struct drm_panthor_queue_create *args)
2969 {
2970         struct drm_gpu_scheduler *drm_sched;
2971         struct panthor_queue *queue;
2972         int ret;
2973
2974         if (args->pad[0] || args->pad[1] || args->pad[2])
2975                 return ERR_PTR(-EINVAL);
2976
2977         if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
2978             !is_power_of_2(args->ringbuf_size))
2979                 return ERR_PTR(-EINVAL);
2980
2981         if (args->priority > CSF_MAX_QUEUE_PRIO)
2982                 return ERR_PTR(-EINVAL);
2983
2984         queue = kzalloc(sizeof(*queue), GFP_KERNEL);
2985         if (!queue)
2986                 return ERR_PTR(-ENOMEM);
2987
2988         queue->fence_ctx.id = dma_fence_context_alloc(1);
2989         spin_lock_init(&queue->fence_ctx.lock);
2990         INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
2991
2992         queue->priority = args->priority;
2993
2994         queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
2995                                                   args->ringbuf_size,
2996                                                   DRM_PANTHOR_BO_NO_MMAP,
2997                                                   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
2998                                                   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
2999                                                   PANTHOR_VM_KERNEL_AUTO_VA);
3000         if (IS_ERR(queue->ringbuf)) {
3001                 ret = PTR_ERR(queue->ringbuf);
3002                 goto err_free_queue;
3003         }
3004
3005         ret = panthor_kernel_bo_vmap(queue->ringbuf);
3006         if (ret)
3007                 goto err_free_queue;
3008
3009         queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
3010                                                             &queue->iface.input,
3011                                                             &queue->iface.output,
3012                                                             &queue->iface.input_fw_va,
3013                                                             &queue->iface.output_fw_va);
3014         if (IS_ERR(queue->iface.mem)) {
3015                 ret = PTR_ERR(queue->iface.mem);
3016                 goto err_free_queue;
3017         }
3018
3019         ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
3020                              group->ptdev->scheduler->wq, 1,
3021                              args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
3022                              0, msecs_to_jiffies(JOB_TIMEOUT_MS),
3023                              group->ptdev->reset.wq,
3024                              NULL, "panthor-queue", group->ptdev->base.dev);
3025         if (ret)
3026                 goto err_free_queue;
3027
3028         drm_sched = &queue->scheduler;
3029         ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
3030
3031         return queue;
3032
3033 err_free_queue:
3034         group_free_queue(group, queue);
3035         return ERR_PTR(ret);
3036 }
3037
3038 #define MAX_GROUPS_PER_POOL             128
3039
3040 int panthor_group_create(struct panthor_file *pfile,
3041                          const struct drm_panthor_group_create *group_args,
3042                          const struct drm_panthor_queue_create *queue_args)
3043 {
3044         struct panthor_device *ptdev = pfile->ptdev;
3045         struct panthor_group_pool *gpool = pfile->groups;
3046         struct panthor_scheduler *sched = ptdev->scheduler;
3047         struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3048         struct panthor_group *group = NULL;
3049         u32 gid, i, suspend_size;
3050         int ret;
3051
3052         if (group_args->pad)
3053                 return -EINVAL;
3054
3055         if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
3056                 return -EINVAL;
3057
3058         if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
3059             (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
3060             (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
3061                 return -EINVAL;
3062
3063         if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
3064             hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
3065             hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
3066                 return -EINVAL;
3067
3068         group = kzalloc(sizeof(*group), GFP_KERNEL);
3069         if (!group)
3070                 return -ENOMEM;
3071
3072         spin_lock_init(&group->fatal_lock);
3073         kref_init(&group->refcount);
3074         group->state = PANTHOR_CS_GROUP_CREATED;
3075         group->csg_id = -1;
3076
3077         group->ptdev = ptdev;
3078         group->max_compute_cores = group_args->max_compute_cores;
3079         group->compute_core_mask = group_args->compute_core_mask;
3080         group->max_fragment_cores = group_args->max_fragment_cores;
3081         group->fragment_core_mask = group_args->fragment_core_mask;
3082         group->max_tiler_cores = group_args->max_tiler_cores;
3083         group->tiler_core_mask = group_args->tiler_core_mask;
3084         group->priority = group_args->priority;
3085
3086         INIT_LIST_HEAD(&group->wait_node);
3087         INIT_LIST_HEAD(&group->run_node);
3088         INIT_WORK(&group->term_work, group_term_work);
3089         INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
3090         INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
3091         INIT_WORK(&group->release_work, group_release_work);
3092
3093         group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
3094         if (!group->vm) {
3095                 ret = -EINVAL;
3096                 goto err_put_group;
3097         }
3098
3099         suspend_size = csg_iface->control->suspend_size;
3100         group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3101         if (IS_ERR(group->suspend_buf)) {
3102                 ret = PTR_ERR(group->suspend_buf);
3103                 group->suspend_buf = NULL;
3104                 goto err_put_group;
3105         }
3106
3107         suspend_size = csg_iface->control->protm_suspend_size;
3108         group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3109         if (IS_ERR(group->protm_suspend_buf)) {
3110                 ret = PTR_ERR(group->protm_suspend_buf);
3111                 group->protm_suspend_buf = NULL;
3112                 goto err_put_group;
3113         }
3114
3115         group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
3116                                                    group_args->queues.count *
3117                                                    sizeof(struct panthor_syncobj_64b),
3118                                                    DRM_PANTHOR_BO_NO_MMAP,
3119                                                    DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3120                                                    DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3121                                                    PANTHOR_VM_KERNEL_AUTO_VA);
3122         if (IS_ERR(group->syncobjs)) {
3123                 ret = PTR_ERR(group->syncobjs);
3124                 goto err_put_group;
3125         }
3126
3127         ret = panthor_kernel_bo_vmap(group->syncobjs);
3128         if (ret)
3129                 goto err_put_group;
3130
3131         memset(group->syncobjs->kmap, 0,
3132                group_args->queues.count * sizeof(struct panthor_syncobj_64b));
3133
3134         for (i = 0; i < group_args->queues.count; i++) {
3135                 group->queues[i] = group_create_queue(group, &queue_args[i]);
3136                 if (IS_ERR(group->queues[i])) {
3137                         ret = PTR_ERR(group->queues[i]);
3138                         group->queues[i] = NULL;
3139                         goto err_put_group;
3140                 }
3141
3142                 group->queue_count++;
3143         }
3144
3145         group->idle_queues = GENMASK(group->queue_count - 1, 0);
3146
3147         ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
3148         if (ret)
3149                 goto err_put_group;
3150
3151         mutex_lock(&sched->reset.lock);
3152         if (atomic_read(&sched->reset.in_progress)) {
3153                 panthor_group_stop(group);
3154         } else {
3155                 mutex_lock(&sched->lock);
3156                 list_add_tail(&group->run_node,
3157                               &sched->groups.idle[group->priority]);
3158                 mutex_unlock(&sched->lock);
3159         }
3160         mutex_unlock(&sched->reset.lock);
3161
3162         return gid;
3163
3164 err_put_group:
3165         group_put(group);
3166         return ret;
3167 }
3168
3169 int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
3170 {
3171         struct panthor_group_pool *gpool = pfile->groups;
3172         struct panthor_device *ptdev = pfile->ptdev;
3173         struct panthor_scheduler *sched = ptdev->scheduler;
3174         struct panthor_group *group;
3175
3176         group = xa_erase(&gpool->xa, group_handle);
3177         if (!group)
3178                 return -EINVAL;
3179
3180         for (u32 i = 0; i < group->queue_count; i++) {
3181                 if (group->queues[i])
3182                         drm_sched_entity_destroy(&group->queues[i]->entity);
3183         }
3184
3185         mutex_lock(&sched->reset.lock);
3186         mutex_lock(&sched->lock);
3187         group->destroyed = true;
3188         if (group->csg_id >= 0) {
3189                 sched_queue_delayed_work(sched, tick, 0);
3190         } else if (!atomic_read(&sched->reset.in_progress)) {
3191                 /* Remove from the run queues, so the scheduler can't
3192                  * pick the group on the next tick.
3193                  */
3194                 list_del_init(&group->run_node);
3195                 list_del_init(&group->wait_node);
3196                 group_queue_work(group, term);
3197         }
3198         mutex_unlock(&sched->lock);
3199         mutex_unlock(&sched->reset.lock);
3200
3201         group_put(group);
3202         return 0;
3203 }
3204
3205 int panthor_group_get_state(struct panthor_file *pfile,
3206                             struct drm_panthor_group_get_state *get_state)
3207 {
3208         struct panthor_group_pool *gpool = pfile->groups;
3209         struct panthor_device *ptdev = pfile->ptdev;
3210         struct panthor_scheduler *sched = ptdev->scheduler;
3211         struct panthor_group *group;
3212
3213         if (get_state->pad)
3214                 return -EINVAL;
3215
3216         group = group_get(xa_load(&gpool->xa, get_state->group_handle));
3217         if (!group)
3218                 return -EINVAL;
3219
3220         memset(get_state, 0, sizeof(*get_state));
3221
3222         mutex_lock(&sched->lock);
3223         if (group->timedout)
3224                 get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
3225         if (group->fatal_queues) {
3226                 get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
3227                 get_state->fatal_queues = group->fatal_queues;
3228         }
3229         mutex_unlock(&sched->lock);
3230
3231         group_put(group);
3232         return 0;
3233 }
3234
3235 int panthor_group_pool_create(struct panthor_file *pfile)
3236 {
3237         struct panthor_group_pool *gpool;
3238
3239         gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
3240         if (!gpool)
3241                 return -ENOMEM;
3242
3243         xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
3244         pfile->groups = gpool;
3245         return 0;
3246 }
3247
3248 void panthor_group_pool_destroy(struct panthor_file *pfile)
3249 {
3250         struct panthor_group_pool *gpool = pfile->groups;
3251         struct panthor_group *group;
3252         unsigned long i;
3253
3254         if (IS_ERR_OR_NULL(gpool))
3255                 return;
3256
3257         xa_for_each(&gpool->xa, i, group)
3258                 panthor_group_destroy(pfile, i);
3259
3260         xa_destroy(&gpool->xa);
3261         kfree(gpool);
3262         pfile->groups = NULL;
3263 }
3264
3265 static void job_release(struct kref *ref)
3266 {
3267         struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
3268
3269         drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
3270
3271         if (job->base.s_fence)
3272                 drm_sched_job_cleanup(&job->base);
3273
3274         if (job->done_fence && job->done_fence->ops)
3275                 dma_fence_put(job->done_fence);
3276         else
3277                 dma_fence_free(job->done_fence);
3278
3279         group_put(job->group);
3280
3281         kfree(job);
3282 }
3283
3284 struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
3285 {
3286         if (sched_job) {
3287                 struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3288
3289                 kref_get(&job->refcount);
3290         }
3291
3292         return sched_job;
3293 }
3294
3295 void panthor_job_put(struct drm_sched_job *sched_job)
3296 {
3297         struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3298
3299         if (sched_job)
3300                 kref_put(&job->refcount, job_release);
3301 }
3302
3303 struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
3304 {
3305         struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3306
3307         return job->group->vm;
3308 }
3309
3310 struct drm_sched_job *
3311 panthor_job_create(struct panthor_file *pfile,
3312                    u16 group_handle,
3313                    const struct drm_panthor_queue_submit *qsubmit)
3314 {
3315         struct panthor_group_pool *gpool = pfile->groups;
3316         struct panthor_job *job;
3317         int ret;
3318
3319         if (qsubmit->pad)
3320                 return ERR_PTR(-EINVAL);
3321
3322         /* If stream_addr is zero, so stream_size should be. */
3323         if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
3324                 return ERR_PTR(-EINVAL);
3325
3326         /* Make sure the address is aligned on 64-byte (cacheline) and the size is
3327          * aligned on 8-byte (instruction size).
3328          */
3329         if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
3330                 return ERR_PTR(-EINVAL);
3331
3332         /* bits 24:30 must be zero. */
3333         if (qsubmit->latest_flush & GENMASK(30, 24))
3334                 return ERR_PTR(-EINVAL);
3335
3336         job = kzalloc(sizeof(*job), GFP_KERNEL);
3337         if (!job)
3338                 return ERR_PTR(-ENOMEM);
3339
3340         kref_init(&job->refcount);
3341         job->queue_idx = qsubmit->queue_index;
3342         job->call_info.size = qsubmit->stream_size;
3343         job->call_info.start = qsubmit->stream_addr;
3344         job->call_info.latest_flush = qsubmit->latest_flush;
3345         INIT_LIST_HEAD(&job->node);
3346
3347         job->group = group_get(xa_load(&gpool->xa, group_handle));
3348         if (!job->group) {
3349                 ret = -EINVAL;
3350                 goto err_put_job;
3351         }
3352
3353         if (job->queue_idx >= job->group->queue_count ||
3354             !job->group->queues[job->queue_idx]) {
3355                 ret = -EINVAL;
3356                 goto err_put_job;
3357         }
3358
3359         job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
3360         if (!job->done_fence) {
3361                 ret = -ENOMEM;
3362                 goto err_put_job;
3363         }
3364
3365         ret = drm_sched_job_init(&job->base,
3366                                  &job->group->queues[job->queue_idx]->entity,
3367                                  1, job->group);
3368         if (ret)
3369                 goto err_put_job;
3370
3371         return &job->base;
3372
3373 err_put_job:
3374         panthor_job_put(&job->base);
3375         return ERR_PTR(ret);
3376 }
3377
3378 void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
3379 {
3380         struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3381
3382         /* Still not sure why we want USAGE_WRITE for external objects, since I
3383          * was assuming this would be handled through explicit syncs being imported
3384          * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
3385          * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
3386          */
3387         panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
3388                                 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
3389 }
3390
3391 void panthor_sched_unplug(struct panthor_device *ptdev)
3392 {
3393         struct panthor_scheduler *sched = ptdev->scheduler;
3394
3395         cancel_delayed_work_sync(&sched->tick_work);
3396
3397         mutex_lock(&sched->lock);
3398         if (sched->pm.has_ref) {
3399                 pm_runtime_put(ptdev->base.dev);
3400                 sched->pm.has_ref = false;
3401         }
3402         mutex_unlock(&sched->lock);
3403 }
3404
3405 static void panthor_sched_fini(struct drm_device *ddev, void *res)
3406 {
3407         struct panthor_scheduler *sched = res;
3408         int prio;
3409
3410         if (!sched || !sched->csg_slot_count)
3411                 return;
3412
3413         cancel_delayed_work_sync(&sched->tick_work);
3414
3415         if (sched->wq)
3416                 destroy_workqueue(sched->wq);
3417
3418         if (sched->heap_alloc_wq)
3419                 destroy_workqueue(sched->heap_alloc_wq);
3420
3421         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3422                 drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
3423                 drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
3424         }
3425
3426         drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
3427 }
3428
3429 int panthor_sched_init(struct panthor_device *ptdev)
3430 {
3431         struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
3432         struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3433         struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
3434         struct panthor_scheduler *sched;
3435         u32 gpu_as_count, num_groups;
3436         int prio, ret;
3437
3438         sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
3439         if (!sched)
3440                 return -ENOMEM;
3441
3442         /* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
3443          * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
3444          */
3445         num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
3446
3447         /* The FW-side scheduler might deadlock if two groups with the same
3448          * priority try to access a set of resources that overlaps, with part
3449          * of the resources being allocated to one group and the other part to
3450          * the other group, both groups waiting for the remaining resources to
3451          * be allocated. To avoid that, it is recommended to assign each CSG a
3452          * different priority. In theory we could allow several groups to have
3453          * the same CSG priority if they don't request the same resources, but
3454          * that makes the scheduling logic more complicated, so let's clamp
3455          * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
3456          */
3457         num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
3458
3459         /* We need at least one AS for the MCU and one for the GPU contexts. */
3460         gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
3461         if (!gpu_as_count) {
3462                 drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
3463                         gpu_as_count + 1);
3464                 return -EINVAL;
3465         }
3466
3467         sched->ptdev = ptdev;
3468         sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
3469         sched->csg_slot_count = num_groups;
3470         sched->cs_slot_count = csg_iface->control->stream_num;
3471         sched->as_slot_count = gpu_as_count;
3472         ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
3473         ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
3474         ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
3475
3476         sched->last_tick = 0;
3477         sched->resched_target = U64_MAX;
3478         sched->tick_period = msecs_to_jiffies(10);
3479         INIT_DELAYED_WORK(&sched->tick_work, tick_work);
3480         INIT_WORK(&sched->sync_upd_work, sync_upd_work);
3481         INIT_WORK(&sched->fw_events_work, process_fw_events_work);
3482
3483         ret = drmm_mutex_init(&ptdev->base, &sched->lock);
3484         if (ret)
3485                 return ret;
3486
3487         for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3488                 INIT_LIST_HEAD(&sched->groups.runnable[prio]);
3489                 INIT_LIST_HEAD(&sched->groups.idle[prio]);
3490         }
3491         INIT_LIST_HEAD(&sched->groups.waiting);
3492
3493         ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
3494         if (ret)
3495                 return ret;
3496
3497         INIT_LIST_HEAD(&sched->reset.stopped_groups);
3498
3499         /* sched->heap_alloc_wq will be used for heap chunk allocation on
3500          * tiler OOM events, which means we can't use the same workqueue for
3501          * the scheduler because works queued by the scheduler are in
3502          * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
3503          * work around this limitation.
3504          *
3505          * FIXME: Ultimately, what we need is a failable/non-blocking GEM
3506          * allocation path that we can call when a heap OOM is reported. The
3507          * FW is smart enough to fall back on other methods if the kernel can't
3508          * allocate memory, and fail the tiling job if none of these
3509          * countermeasures worked.
3510          *
3511          * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
3512          * system is running out of memory.
3513          */
3514         sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
3515         sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
3516         if (!sched->wq || !sched->heap_alloc_wq) {
3517                 panthor_sched_fini(&ptdev->base, sched);
3518                 drm_err(&ptdev->base, "Failed to allocate the workqueues");
3519                 return -ENOMEM;
3520         }
3521
3522         ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
3523         if (ret)
3524                 return ret;
3525
3526         ptdev->scheduler = sched;
3527         return 0;
3528 }