drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2014 Intel Corporation
   4  */
   5
   6 #include <linux/circ_buf.h>
   7
   8 #include "gem/i915_gem_context.h"
   9 #include "gt/gen8_engine_cs.h"
  10 #include "gt/intel_breadcrumbs.h"
  11 #include "gt/intel_context.h"
  12 #include "gt/intel_engine_pm.h"
  13 #include "gt/intel_engine_heartbeat.h"
  14 #include "gt/intel_gt.h"
  15 #include "gt/intel_gt_irq.h"
  16 #include "gt/intel_gt_pm.h"
  17 #include "gt/intel_gt_requests.h"
  18 #include "gt/intel_lrc.h"
  19 #include "gt/intel_lrc_reg.h"
  20 #include "gt/intel_mocs.h"
  21 #include "gt/intel_ring.h"
  22
  23 #include "intel_guc_submission.h"
  24
  25 #include "i915_drv.h"
  26 #include "i915_trace.h"
  27
  28 /**
  29  * DOC: GuC-based command submission
  30  *
  31  * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
  32  * firmware is moving to an updated submission interface and we plan to
  33  * turn submission back on when that lands. The below documentation (and related
  34  * code) matches the old submission model and will be updated as part of the
  35  * upgrade to the new flow.
  36  *
  37  * GuC stage descriptor:
  38  * During initialization, the driver allocates a static pool of 1024 such
  39  * descriptors, and shares them with the GuC. Currently, we only use one
  40  * descriptor. This stage descriptor lets the GuC know about the workqueue and
  41  * process descriptor. Theoretically, it also lets the GuC know about our HW
  42  * contexts (context ID, etc...), but we actually employ a kind of submission
  43  * where the GuC uses the LRCA sent via the work item instead. This is called
  44  * a "proxy" submission.
  45  *
  46  * The Scratch registers:
  47  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  48  * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  49  * triggers an interrupt on the GuC via another register write (0xC4C8).
  50  * Firmware writes a success/fail code back to the action register after
  51  * processes the request. The kernel driver polls waiting for this update and
  52  * then proceeds.
  53  *
  54  * Work Items:
  55  * There are several types of work items that the host may place into a
  56  * workqueue, each with its own requirements and limitations. Currently only
  57  * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
  58  * represents in-order queue. The kernel driver packs ring tail pointer and an
  59  * ELSP context descriptor dword into Work Item.
  60  * See guc_add_request()
  61  *
  62  */
  63
  64 /* GuC Virtual Engine */
  65 struct guc_virtual_engine {
  66         struct intel_engine_cs base;
  67         struct intel_context context;
  68 };
  69
  70 static struct intel_context *
  71 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
  72
  73 #define GUC_REQUEST_SIZE 64 /* bytes */
  74
  75 /*
  76  * Below is a set of functions which control the GuC scheduling state which do
  77  * not require a lock as all state transitions are mutually exclusive. i.e. It
  78  * is not possible for the context pinning code and submission, for the same
  79  * context, to be executing simultaneously. We still need an atomic as it is
  80  * possible for some of the bits to changing at the same time though.
  81  */
  82 #define SCHED_STATE_NO_LOCK_ENABLED                     BIT(0)
  83 #define SCHED_STATE_NO_LOCK_PENDING_ENABLE              BIT(1)
  84 #define SCHED_STATE_NO_LOCK_REGISTERED                  BIT(2)
  85 static inline bool context_enabled(struct intel_context *ce)
  86 {
  87         return (atomic_read(&ce->guc_sched_state_no_lock) &
  88                 SCHED_STATE_NO_LOCK_ENABLED);
  89 }
  90
  91 static inline void set_context_enabled(struct intel_context *ce)
  92 {
  93         atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
  94 }
  95
  96 static inline void clr_context_enabled(struct intel_context *ce)
  97 {
  98         atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
  99                    &ce->guc_sched_state_no_lock);
 100 }
 101
 102 static inline bool context_pending_enable(struct intel_context *ce)
 103 {
 104         return (atomic_read(&ce->guc_sched_state_no_lock) &
 105                 SCHED_STATE_NO_LOCK_PENDING_ENABLE);
 106 }
 107
 108 static inline void set_context_pending_enable(struct intel_context *ce)
 109 {
 110         atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
 111                   &ce->guc_sched_state_no_lock);
 112 }
 113
 114 static inline void clr_context_pending_enable(struct intel_context *ce)
 115 {
 116         atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
 117                    &ce->guc_sched_state_no_lock);
 118 }
 119
 120 static inline bool context_registered(struct intel_context *ce)
 121 {
 122         return (atomic_read(&ce->guc_sched_state_no_lock) &
 123                 SCHED_STATE_NO_LOCK_REGISTERED);
 124 }
 125
 126 static inline void set_context_registered(struct intel_context *ce)
 127 {
 128         atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
 129                   &ce->guc_sched_state_no_lock);
 130 }
 131
 132 static inline void clr_context_registered(struct intel_context *ce)
 133 {
 134         atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
 135                    &ce->guc_sched_state_no_lock);
 136 }
 137
 138 /*
 139  * Below is a set of functions which control the GuC scheduling state which
 140  * require a lock, aside from the special case where the functions are called
 141  * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
 142  * path to be executing on the context.
 143  */
 144 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER     BIT(0)
 145 #define SCHED_STATE_DESTROYED                           BIT(1)
 146 #define SCHED_STATE_PENDING_DISABLE                     BIT(2)
 147 #define SCHED_STATE_BANNED                              BIT(3)
 148 #define SCHED_STATE_BLOCKED_SHIFT                       4
 149 #define SCHED_STATE_BLOCKED             BIT(SCHED_STATE_BLOCKED_SHIFT)
 150 #define SCHED_STATE_BLOCKED_MASK        (0xfff << SCHED_STATE_BLOCKED_SHIFT)
 151 static inline void init_sched_state(struct intel_context *ce)
 152 {
 153         /* Only should be called from guc_lrc_desc_pin() */
 154         atomic_set(&ce->guc_sched_state_no_lock, 0);
 155         ce->guc_state.sched_state = 0;
 156 }
 157
 158 static inline bool
 159 context_wait_for_deregister_to_register(struct intel_context *ce)
 160 {
 161         return ce->guc_state.sched_state &
 162                 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 163 }
 164
 165 static inline void
 166 set_context_wait_for_deregister_to_register(struct intel_context *ce)
 167 {
 168         /* Only should be called from guc_lrc_desc_pin() without lock */
 169         ce->guc_state.sched_state |=
 170                 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 171 }
 172
 173 static inline void
 174 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
 175 {
 176         lockdep_assert_held(&ce->guc_state.lock);
 177         ce->guc_state.sched_state &=
 178                 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
 179 }
 180
 181 static inline bool
 182 context_destroyed(struct intel_context *ce)
 183 {
 184         return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
 185 }
 186
 187 static inline void
 188 set_context_destroyed(struct intel_context *ce)
 189 {
 190         lockdep_assert_held(&ce->guc_state.lock);
 191         ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
 192 }
 193
 194 static inline bool context_pending_disable(struct intel_context *ce)
 195 {
 196         return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
 197 }
 198
 199 static inline void set_context_pending_disable(struct intel_context *ce)
 200 {
 201         lockdep_assert_held(&ce->guc_state.lock);
 202         ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
 203 }
 204
 205 static inline void clr_context_pending_disable(struct intel_context *ce)
 206 {
 207         lockdep_assert_held(&ce->guc_state.lock);
 208         ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
 209 }
 210
 211 static inline bool context_banned(struct intel_context *ce)
 212 {
 213         return ce->guc_state.sched_state & SCHED_STATE_BANNED;
 214 }
 215
 216 static inline void set_context_banned(struct intel_context *ce)
 217 {
 218         lockdep_assert_held(&ce->guc_state.lock);
 219         ce->guc_state.sched_state |= SCHED_STATE_BANNED;
 220 }
 221
 222 static inline void clr_context_banned(struct intel_context *ce)
 223 {
 224         lockdep_assert_held(&ce->guc_state.lock);
 225         ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
 226 }
 227
 228 static inline u32 context_blocked(struct intel_context *ce)
 229 {
 230         return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
 231                 SCHED_STATE_BLOCKED_SHIFT;
 232 }
 233
 234 static inline void incr_context_blocked(struct intel_context *ce)
 235 {
 236         lockdep_assert_held(&ce->engine->sched_engine->lock);
 237         lockdep_assert_held(&ce->guc_state.lock);
 238
 239         ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
 240
 241         GEM_BUG_ON(!context_blocked(ce));       /* Overflow check */
 242 }
 243
 244 static inline void decr_context_blocked(struct intel_context *ce)
 245 {
 246         lockdep_assert_held(&ce->engine->sched_engine->lock);
 247         lockdep_assert_held(&ce->guc_state.lock);
 248
 249         GEM_BUG_ON(!context_blocked(ce));       /* Underflow check */
 250
 251         ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
 252 }
 253
 254 static inline bool context_guc_id_invalid(struct intel_context *ce)
 255 {
 256         return ce->guc_id == GUC_INVALID_LRC_ID;
 257 }
 258
 259 static inline void set_context_guc_id_invalid(struct intel_context *ce)
 260 {
 261         ce->guc_id = GUC_INVALID_LRC_ID;
 262 }
 263
 264 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
 265 {
 266         return &ce->engine->gt->uc.guc;
 267 }
 268
 269 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 270 {
 271         return rb_entry(rb, struct i915_priolist, node);
 272 }
 273
 274 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
 275 {
 276         struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
 277
 278         GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
 279
 280         return &base[index];
 281 }
 282
 283 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
 284 {
 285         struct intel_context *ce = xa_load(&guc->context_lookup, id);
 286
 287         GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
 288
 289         return ce;
 290 }
 291
 292 static int guc_lrc_desc_pool_create(struct intel_guc *guc)
 293 {
 294         u32 size;
 295         int ret;
 296
 297         size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) *
 298                           GUC_MAX_LRC_DESCRIPTORS);
 299         ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool,
 300                                              (void **)&guc->lrc_desc_pool_vaddr);
 301         if (ret)
 302                 return ret;
 303
 304         return 0;
 305 }
 306
 307 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
 308 {
 309         guc->lrc_desc_pool_vaddr = NULL;
 310         i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP);
 311 }
 312
 313 static inline bool guc_submission_initialized(struct intel_guc *guc)
 314 {
 315         return !!guc->lrc_desc_pool_vaddr;
 316 }
 317
 318 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
 319 {
 320         if (likely(guc_submission_initialized(guc))) {
 321                 struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
 322                 unsigned long flags;
 323
 324                 memset(desc, 0, sizeof(*desc));
 325
 326                 /*
 327                  * xarray API doesn't have xa_erase_irqsave wrapper, so calling
 328                  * the lower level functions directly.
 329                  */
 330                 xa_lock_irqsave(&guc->context_lookup, flags);
 331                 __xa_erase(&guc->context_lookup, id);
 332                 xa_unlock_irqrestore(&guc->context_lookup, flags);
 333         }
 334 }
 335
 336 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
 337 {
 338         return __get_context(guc, id);
 339 }
 340
 341 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
 342                                            struct intel_context *ce)
 343 {
 344         unsigned long flags;
 345
 346         /*
 347          * xarray API doesn't have xa_save_irqsave wrapper, so calling the
 348          * lower level functions directly.
 349          */
 350         xa_lock_irqsave(&guc->context_lookup, flags);
 351         __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
 352         xa_unlock_irqrestore(&guc->context_lookup, flags);
 353 }
 354
 355 static int guc_submission_send_busy_loop(struct intel_guc *guc,
 356                                          const u32 *action,
 357                                          u32 len,
 358                                          u32 g2h_len_dw,
 359                                          bool loop)
 360 {
 361         int err;
 362
 363         err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
 364
 365         if (!err && g2h_len_dw)
 366                 atomic_inc(&guc->outstanding_submission_g2h);
 367
 368         return err;
 369 }
 370
 371 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
 372                                    atomic_t *wait_var,
 373                                    bool interruptible,
 374                                    long timeout)
 375 {
 376         const int state = interruptible ?
 377                 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 378         DEFINE_WAIT(wait);
 379
 380         might_sleep();
 381         GEM_BUG_ON(timeout < 0);
 382
 383         if (!atomic_read(wait_var))
 384                 return 0;
 385
 386         if (!timeout)
 387                 return -ETIME;
 388
 389         for (;;) {
 390                 prepare_to_wait(&guc->ct.wq, &wait, state);
 391
 392                 if (!atomic_read(wait_var))
 393                         break;
 394
 395                 if (signal_pending_state(state, current)) {
 396                         timeout = -EINTR;
 397                         break;
 398                 }
 399
 400                 if (!timeout) {
 401                         timeout = -ETIME;
 402                         break;
 403                 }
 404
 405                 timeout = io_schedule_timeout(timeout);
 406         }
 407         finish_wait(&guc->ct.wq, &wait);
 408
 409         return (timeout < 0) ? timeout : 0;
 410 }
 411
 412 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
 413 {
 414         if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
 415                 return 0;
 416
 417         return intel_guc_wait_for_pending_msg(guc,
 418                                               &guc->outstanding_submission_g2h,
 419                                               true, timeout);
 420 }
 421
 422 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
 423
 424 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 425 {
 426         int err = 0;
 427         struct intel_context *ce = rq->context;
 428         u32 action[3];
 429         int len = 0;
 430         u32 g2h_len_dw = 0;
 431         bool enabled;
 432
 433         /*
 434          * Corner case where requests were sitting in the priority list or a
 435          * request resubmitted after the context was banned.
 436          */
 437         if (unlikely(intel_context_is_banned(ce))) {
 438                 i915_request_put(i915_request_mark_eio(rq));
 439                 intel_engine_signal_breadcrumbs(ce->engine);
 440                 goto out;
 441         }
 442
 443         GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
 444         GEM_BUG_ON(context_guc_id_invalid(ce));
 445
 446         /*
 447          * Corner case where the GuC firmware was blown away and reloaded while
 448          * this context was pinned.
 449          */
 450         if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) {
 451                 err = guc_lrc_desc_pin(ce, false);
 452                 if (unlikely(err))
 453                         goto out;
 454         }
 455
 456         /*
 457          * The request / context will be run on the hardware when scheduling
 458          * gets enabled in the unblock.
 459          */
 460         if (unlikely(context_blocked(ce)))
 461                 goto out;
 462
 463         enabled = context_enabled(ce);
 464
 465         if (!enabled) {
 466                 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
 467                 action[len++] = ce->guc_id;
 468                 action[len++] = GUC_CONTEXT_ENABLE;
 469                 set_context_pending_enable(ce);
 470                 intel_context_get(ce);
 471                 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
 472         } else {
 473                 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
 474                 action[len++] = ce->guc_id;
 475         }
 476
 477         err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
 478         if (!enabled && !err) {
 479                 trace_intel_context_sched_enable(ce);
 480                 atomic_inc(&guc->outstanding_submission_g2h);
 481                 set_context_enabled(ce);
 482         } else if (!enabled) {
 483                 clr_context_pending_enable(ce);
 484                 intel_context_put(ce);
 485         }
 486         if (likely(!err))
 487                 trace_i915_request_guc_submit(rq);
 488
 489 out:
 490         return err;
 491 }
 492
 493 static inline void guc_set_lrc_tail(struct i915_request *rq)
 494 {
 495         rq->context->lrc_reg_state[CTX_RING_TAIL] =
 496                 intel_ring_set_tail(rq->ring, rq->tail);
 497 }
 498
 499 static inline int rq_prio(const struct i915_request *rq)
 500 {
 501         return rq->sched.attr.priority;
 502 }
 503
 504 static int guc_dequeue_one_context(struct intel_guc *guc)
 505 {
 506         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 507         struct i915_request *last = NULL;
 508         bool submit = false;
 509         struct rb_node *rb;
 510         int ret;
 511
 512         lockdep_assert_held(&sched_engine->lock);
 513
 514         if (guc->stalled_request) {
 515                 submit = true;
 516                 last = guc->stalled_request;
 517                 goto resubmit;
 518         }
 519
 520         while ((rb = rb_first_cached(&sched_engine->queue))) {
 521                 struct i915_priolist *p = to_priolist(rb);
 522                 struct i915_request *rq, *rn;
 523
 524                 priolist_for_each_request_consume(rq, rn, p) {
 525                         if (last && rq->context != last->context)
 526                                 goto done;
 527
 528                         list_del_init(&rq->sched.link);
 529
 530                         __i915_request_submit(rq);
 531
 532                         trace_i915_request_in(rq, 0);
 533                         last = rq;
 534                         submit = true;
 535                 }
 536
 537                 rb_erase_cached(&p->node, &sched_engine->queue);
 538                 i915_priolist_free(p);
 539         }
 540 done:
 541         if (submit) {
 542                 guc_set_lrc_tail(last);
 543 resubmit:
 544                 ret = guc_add_request(guc, last);
 545                 if (unlikely(ret == -EPIPE))
 546                         goto deadlk;
 547                 else if (ret == -EBUSY) {
 548                         tasklet_schedule(&sched_engine->tasklet);
 549                         guc->stalled_request = last;
 550                         return false;
 551                 }
 552         }
 553
 554         guc->stalled_request = NULL;
 555         return submit;
 556
 557 deadlk:
 558         sched_engine->tasklet.callback = NULL;
 559         tasklet_disable_nosync(&sched_engine->tasklet);
 560         return false;
 561 }
 562
 563 static void guc_submission_tasklet(struct tasklet_struct *t)
 564 {
 565         struct i915_sched_engine *sched_engine =
 566                 from_tasklet(sched_engine, t, tasklet);
 567         unsigned long flags;
 568         bool loop;
 569
 570         spin_lock_irqsave(&sched_engine->lock, flags);
 571
 572         do {
 573                 loop = guc_dequeue_one_context(sched_engine->private_data);
 574         } while (loop);
 575
 576         i915_sched_engine_reset_on_empty(sched_engine);
 577
 578         spin_unlock_irqrestore(&sched_engine->lock, flags);
 579 }
 580
 581 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
 582 {
 583         if (iir & GT_RENDER_USER_INTERRUPT)
 584                 intel_engine_signal_breadcrumbs(engine);
 585 }
 586
 587 static void __guc_context_destroy(struct intel_context *ce);
 588 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
 589 static void guc_signal_context_fence(struct intel_context *ce);
 590 static void guc_cancel_context_requests(struct intel_context *ce);
 591 static void guc_blocked_fence_complete(struct intel_context *ce);
 592
 593 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 594 {
 595         struct intel_context *ce;
 596         unsigned long index, flags;
 597         bool pending_disable, pending_enable, deregister, destroyed, banned;
 598
 599         xa_for_each(&guc->context_lookup, index, ce) {
 600                 /* Flush context */
 601                 spin_lock_irqsave(&ce->guc_state.lock, flags);
 602                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 603
 604                 /*
 605                  * Once we are at this point submission_disabled() is guaranteed
 606                  * to be visible to all callers who set the below flags (see above
 607                  * flush and flushes in reset_prepare). If submission_disabled()
 608                  * is set, the caller shouldn't set these flags.
 609                  */
 610
 611                 destroyed = context_destroyed(ce);
 612                 pending_enable = context_pending_enable(ce);
 613                 pending_disable = context_pending_disable(ce);
 614                 deregister = context_wait_for_deregister_to_register(ce);
 615                 banned = context_banned(ce);
 616                 init_sched_state(ce);
 617
 618                 if (pending_enable || destroyed || deregister) {
 619                         atomic_dec(&guc->outstanding_submission_g2h);
 620                         if (deregister)
 621                                 guc_signal_context_fence(ce);
 622                         if (destroyed) {
 623                                 release_guc_id(guc, ce);
 624                                 __guc_context_destroy(ce);
 625                         }
 626                         if (pending_enable || deregister)
 627                                 intel_context_put(ce);
 628                 }
 629
 630                 /* Not mutualy exclusive with above if statement. */
 631                 if (pending_disable) {
 632                         guc_signal_context_fence(ce);
 633                         if (banned) {
 634                                 guc_cancel_context_requests(ce);
 635                                 intel_engine_signal_breadcrumbs(ce->engine);
 636                         }
 637                         intel_context_sched_disable_unpin(ce);
 638                         atomic_dec(&guc->outstanding_submission_g2h);
 639                         spin_lock_irqsave(&ce->guc_state.lock, flags);
 640                         guc_blocked_fence_complete(ce);
 641                         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 642
 643                         intel_context_put(ce);
 644                 }
 645         }
 646 }
 647
 648 static inline bool
 649 submission_disabled(struct intel_guc *guc)
 650 {
 651         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 652
 653         return unlikely(!sched_engine ||
 654                         !__tasklet_is_enabled(&sched_engine->tasklet));
 655 }
 656
 657 static void disable_submission(struct intel_guc *guc)
 658 {
 659         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 660
 661         if (__tasklet_is_enabled(&sched_engine->tasklet)) {
 662                 GEM_BUG_ON(!guc->ct.enabled);
 663                 __tasklet_disable_sync_once(&sched_engine->tasklet);
 664                 sched_engine->tasklet.callback = NULL;
 665         }
 666 }
 667
 668 static void enable_submission(struct intel_guc *guc)
 669 {
 670         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 671         unsigned long flags;
 672
 673         spin_lock_irqsave(&guc->sched_engine->lock, flags);
 674         sched_engine->tasklet.callback = guc_submission_tasklet;
 675         wmb();  /* Make sure callback visible */
 676         if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
 677             __tasklet_enable(&sched_engine->tasklet)) {
 678                 GEM_BUG_ON(!guc->ct.enabled);
 679
 680                 /* And kick in case we missed a new request submission. */
 681                 tasklet_hi_schedule(&sched_engine->tasklet);
 682         }
 683         spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
 684 }
 685
 686 static void guc_flush_submissions(struct intel_guc *guc)
 687 {
 688         struct i915_sched_engine * const sched_engine = guc->sched_engine;
 689         unsigned long flags;
 690
 691         spin_lock_irqsave(&sched_engine->lock, flags);
 692         spin_unlock_irqrestore(&sched_engine->lock, flags);
 693 }
 694
 695 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
 696 {
 697         int i;
 698
 699         if (unlikely(!guc_submission_initialized(guc))) {
 700                 /* Reset called during driver load? GuC not yet initialised! */
 701                 return;
 702         }
 703
 704         intel_gt_park_heartbeats(guc_to_gt(guc));
 705         disable_submission(guc);
 706         guc->interrupts.disable(guc);
 707
 708         /* Flush IRQ handler */
 709         spin_lock_irq(&guc_to_gt(guc)->irq_lock);
 710         spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
 711
 712         guc_flush_submissions(guc);
 713
 714         /*
 715          * Handle any outstanding G2Hs before reset. Call IRQ handler directly
 716          * each pass as interrupt have been disabled. We always scrub for
 717          * outstanding G2H as it is possible for outstanding_submission_g2h to
 718          * be incremented after the context state update.
 719          */
 720         for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
 721                 intel_guc_to_host_event_handler(guc);
 722 #define wait_for_reset(guc, wait_var) \
 723                 intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
 724                 do {
 725                         wait_for_reset(guc, &guc->outstanding_submission_g2h);
 726                 } while (!list_empty(&guc->ct.requests.incoming));
 727         }
 728         scrub_guc_desc_for_outstanding_g2h(guc);
 729 }
 730
 731 static struct intel_engine_cs *
 732 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
 733 {
 734         struct intel_engine_cs *engine;
 735         intel_engine_mask_t tmp, mask = ve->mask;
 736         unsigned int num_siblings = 0;
 737
 738         for_each_engine_masked(engine, ve->gt, mask, tmp)
 739                 if (num_siblings++ == sibling)
 740                         return engine;
 741
 742         return NULL;
 743 }
 744
 745 static inline struct intel_engine_cs *
 746 __context_to_physical_engine(struct intel_context *ce)
 747 {
 748         struct intel_engine_cs *engine = ce->engine;
 749
 750         if (intel_engine_is_virtual(engine))
 751                 engine = guc_virtual_get_sibling(engine, 0);
 752
 753         return engine;
 754 }
 755
 756 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
 757 {
 758         struct intel_engine_cs *engine = __context_to_physical_engine(ce);
 759
 760         if (intel_context_is_banned(ce))
 761                 return;
 762
 763         GEM_BUG_ON(!intel_context_is_pinned(ce));
 764
 765         /*
 766          * We want a simple context + ring to execute the breadcrumb update.
 767          * We cannot rely on the context being intact across the GPU hang,
 768          * so clear it and rebuild just what we need for the breadcrumb.
 769          * All pending requests for this context will be zapped, and any
 770          * future request will be after userspace has had the opportunity
 771          * to recreate its own state.
 772          */
 773         if (scrub)
 774                 lrc_init_regs(ce, engine, true);
 775
 776         /* Rerun the request; its payload has been neutered (if guilty). */
 777         lrc_update_regs(ce, engine, head);
 778 }
 779
 780 static void guc_reset_nop(struct intel_engine_cs *engine)
 781 {
 782 }
 783
 784 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
 785 {
 786 }
 787
 788 static void
 789 __unwind_incomplete_requests(struct intel_context *ce)
 790 {
 791         struct i915_request *rq, *rn;
 792         struct list_head *pl;
 793         int prio = I915_PRIORITY_INVALID;
 794         struct i915_sched_engine * const sched_engine =
 795                 ce->engine->sched_engine;
 796         unsigned long flags;
 797
 798         spin_lock_irqsave(&sched_engine->lock, flags);
 799         spin_lock(&ce->guc_active.lock);
 800         list_for_each_entry_safe(rq, rn,
 801                                  &ce->guc_active.requests,
 802                                  sched.link) {
 803                 if (i915_request_completed(rq))
 804                         continue;
 805
 806                 list_del_init(&rq->sched.link);
 807                 spin_unlock(&ce->guc_active.lock);
 808
 809                 __i915_request_unsubmit(rq);
 810
 811                 /* Push the request back into the queue for later resubmission. */
 812                 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 813                 if (rq_prio(rq) != prio) {
 814                         prio = rq_prio(rq);
 815                         pl = i915_sched_lookup_priolist(sched_engine, prio);
 816                 }
 817                 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
 818
 819                 list_add_tail(&rq->sched.link, pl);
 820                 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 821
 822                 spin_lock(&ce->guc_active.lock);
 823         }
 824         spin_unlock(&ce->guc_active.lock);
 825         spin_unlock_irqrestore(&sched_engine->lock, flags);
 826 }
 827
 828 static void __guc_reset_context(struct intel_context *ce, bool stalled)
 829 {
 830         struct i915_request *rq;
 831         u32 head;
 832
 833         intel_context_get(ce);
 834
 835         /*
 836          * GuC will implicitly mark the context as non-schedulable
 837          * when it sends the reset notification. Make sure our state
 838          * reflects this change. The context will be marked enabled
 839          * on resubmission.
 840          */
 841         clr_context_enabled(ce);
 842
 843         rq = intel_context_find_active_request(ce);
 844         if (!rq) {
 845                 head = ce->ring->tail;
 846                 stalled = false;
 847                 goto out_replay;
 848         }
 849
 850         if (!i915_request_started(rq))
 851                 stalled = false;
 852
 853         GEM_BUG_ON(i915_active_is_idle(&ce->active));
 854         head = intel_ring_wrap(ce->ring, rq->head);
 855         __i915_request_reset(rq, stalled);
 856
 857 out_replay:
 858         guc_reset_state(ce, head, stalled);
 859         __unwind_incomplete_requests(ce);
 860         intel_context_put(ce);
 861 }
 862
 863 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
 864 {
 865         struct intel_context *ce;
 866         unsigned long index;
 867
 868         if (unlikely(!guc_submission_initialized(guc))) {
 869                 /* Reset called during driver load? GuC not yet initialised! */
 870                 return;
 871         }
 872
 873         xa_for_each(&guc->context_lookup, index, ce)
 874                 if (intel_context_is_pinned(ce))
 875                         __guc_reset_context(ce, stalled);
 876
 877         /* GuC is blown away, drop all references to contexts */
 878         xa_destroy(&guc->context_lookup);
 879 }
 880
 881 static void guc_cancel_context_requests(struct intel_context *ce)
 882 {
 883         struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
 884         struct i915_request *rq;
 885         unsigned long flags;
 886
 887         /* Mark all executing requests as skipped. */
 888         spin_lock_irqsave(&sched_engine->lock, flags);
 889         spin_lock(&ce->guc_active.lock);
 890         list_for_each_entry(rq, &ce->guc_active.requests, sched.link)
 891                 i915_request_put(i915_request_mark_eio(rq));
 892         spin_unlock(&ce->guc_active.lock);
 893         spin_unlock_irqrestore(&sched_engine->lock, flags);
 894 }
 895
 896 static void
 897 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
 898 {
 899         struct i915_request *rq, *rn;
 900         struct rb_node *rb;
 901         unsigned long flags;
 902
 903         /* Can be called during boot if GuC fails to load */
 904         if (!sched_engine)
 905                 return;
 906
 907         /*
 908          * Before we call engine->cancel_requests(), we should have exclusive
 909          * access to the submission state. This is arranged for us by the
 910          * caller disabling the interrupt generation, the tasklet and other
 911          * threads that may then access the same state, giving us a free hand
 912          * to reset state. However, we still need to let lockdep be aware that
 913          * we know this state may be accessed in hardirq context, so we
 914          * disable the irq around this manipulation and we want to keep
 915          * the spinlock focused on its duties and not accidentally conflate
 916          * coverage to the submission's irq state. (Similarly, although we
 917          * shouldn't need to disable irq around the manipulation of the
 918          * submission's irq state, we also wish to remind ourselves that
 919          * it is irq state.)
 920          */
 921         spin_lock_irqsave(&sched_engine->lock, flags);
 922
 923         /* Flush the queued requests to the timeline list (for retiring). */
 924         while ((rb = rb_first_cached(&sched_engine->queue))) {
 925                 struct i915_priolist *p = to_priolist(rb);
 926
 927                 priolist_for_each_request_consume(rq, rn, p) {
 928                         list_del_init(&rq->sched.link);
 929
 930                         __i915_request_submit(rq);
 931
 932                         i915_request_put(i915_request_mark_eio(rq));
 933                 }
 934
 935                 rb_erase_cached(&p->node, &sched_engine->queue);
 936                 i915_priolist_free(p);
 937         }
 938
 939         /* Remaining _unready_ requests will be nop'ed when submitted */
 940
 941         sched_engine->queue_priority_hint = INT_MIN;
 942         sched_engine->queue = RB_ROOT_CACHED;
 943
 944         spin_unlock_irqrestore(&sched_engine->lock, flags);
 945 }
 946
 947 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
 948 {
 949         struct intel_context *ce;
 950         unsigned long index;
 951
 952         xa_for_each(&guc->context_lookup, index, ce)
 953                 if (intel_context_is_pinned(ce))
 954                         guc_cancel_context_requests(ce);
 955
 956         guc_cancel_sched_engine_requests(guc->sched_engine);
 957
 958         /* GuC is blown away, drop all references to contexts */
 959         xa_destroy(&guc->context_lookup);
 960 }
 961
 962 void intel_guc_submission_reset_finish(struct intel_guc *guc)
 963 {
 964         /* Reset called during driver load or during wedge? */
 965         if (unlikely(!guc_submission_initialized(guc) ||
 966                      test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
 967                 return;
 968         }
 969
 970         /*
 971          * Technically possible for either of these values to be non-zero here,
 972          * but very unlikely + harmless. Regardless let's add a warn so we can
 973          * see in CI if this happens frequently / a precursor to taking down the
 974          * machine.
 975          */
 976         GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
 977         atomic_set(&guc->outstanding_submission_g2h, 0);
 978
 979         intel_guc_global_policies_update(guc);
 980         enable_submission(guc);
 981         intel_gt_unpark_heartbeats(guc_to_gt(guc));
 982 }
 983
 984 /*
 985  * Set up the memory resources to be shared with the GuC (via the GGTT)
 986  * at firmware loading time.
 987  */
 988 int intel_guc_submission_init(struct intel_guc *guc)
 989 {
 990         int ret;
 991
 992         if (guc->lrc_desc_pool)
 993                 return 0;
 994
 995         ret = guc_lrc_desc_pool_create(guc);
 996         if (ret)
 997                 return ret;
 998         /*
 999          * Keep static analysers happy, let them know that we allocated the
1000          * vma after testing that it didn't exist earlier.
1001          */
1002         GEM_BUG_ON(!guc->lrc_desc_pool);
1003
1004         xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
1005
1006         spin_lock_init(&guc->contexts_lock);
1007         INIT_LIST_HEAD(&guc->guc_id_list);
1008         ida_init(&guc->guc_ids);
1009
1010         return 0;
1011 }
1012
1013 void intel_guc_submission_fini(struct intel_guc *guc)
1014 {
1015         if (!guc->lrc_desc_pool)
1016                 return;
1017
1018         guc_lrc_desc_pool_destroy(guc);
1019         i915_sched_engine_put(guc->sched_engine);
1020 }
1021
1022 static inline void queue_request(struct i915_sched_engine *sched_engine,
1023                                  struct i915_request *rq,
1024                                  int prio)
1025 {
1026         GEM_BUG_ON(!list_empty(&rq->sched.link));
1027         list_add_tail(&rq->sched.link,
1028                       i915_sched_lookup_priolist(sched_engine, prio));
1029         set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1030 }
1031
1032 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1033                                      struct i915_request *rq)
1034 {
1035         int ret;
1036
1037         __i915_request_submit(rq);
1038
1039         trace_i915_request_in(rq, 0);
1040
1041         guc_set_lrc_tail(rq);
1042         ret = guc_add_request(guc, rq);
1043         if (ret == -EBUSY)
1044                 guc->stalled_request = rq;
1045
1046         if (unlikely(ret == -EPIPE))
1047                 disable_submission(guc);
1048
1049         return ret;
1050 }
1051
1052 static void guc_submit_request(struct i915_request *rq)
1053 {
1054         struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1055         struct intel_guc *guc = &rq->engine->gt->uc.guc;
1056         unsigned long flags;
1057
1058         /* Will be called from irq-context when using foreign fences. */
1059         spin_lock_irqsave(&sched_engine->lock, flags);
1060
1061         if (submission_disabled(guc) || guc->stalled_request ||
1062             !i915_sched_engine_is_empty(sched_engine))
1063                 queue_request(sched_engine, rq, rq_prio(rq));
1064         else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1065                 tasklet_hi_schedule(&sched_engine->tasklet);
1066
1067         spin_unlock_irqrestore(&sched_engine->lock, flags);
1068 }
1069
1070 static int new_guc_id(struct intel_guc *guc)
1071 {
1072         return ida_simple_get(&guc->guc_ids, 0,
1073                               GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
1074                               __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
1075 }
1076
1077 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1078 {
1079         if (!context_guc_id_invalid(ce)) {
1080                 ida_simple_remove(&guc->guc_ids, ce->guc_id);
1081                 reset_lrc_desc(guc, ce->guc_id);
1082                 set_context_guc_id_invalid(ce);
1083         }
1084         if (!list_empty(&ce->guc_id_link))
1085                 list_del_init(&ce->guc_id_link);
1086 }
1087
1088 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1089 {
1090         unsigned long flags;
1091
1092         spin_lock_irqsave(&guc->contexts_lock, flags);
1093         __release_guc_id(guc, ce);
1094         spin_unlock_irqrestore(&guc->contexts_lock, flags);
1095 }
1096
1097 static int steal_guc_id(struct intel_guc *guc)
1098 {
1099         struct intel_context *ce;
1100         int guc_id;
1101
1102         lockdep_assert_held(&guc->contexts_lock);
1103
1104         if (!list_empty(&guc->guc_id_list)) {
1105                 ce = list_first_entry(&guc->guc_id_list,
1106                                       struct intel_context,
1107                                       guc_id_link);
1108
1109                 GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
1110                 GEM_BUG_ON(context_guc_id_invalid(ce));
1111
1112                 list_del_init(&ce->guc_id_link);
1113                 guc_id = ce->guc_id;
1114                 clr_context_registered(ce);
1115                 set_context_guc_id_invalid(ce);
1116                 return guc_id;
1117         } else {
1118                 return -EAGAIN;
1119         }
1120 }
1121
1122 static int assign_guc_id(struct intel_guc *guc, u16 *out)
1123 {
1124         int ret;
1125
1126         lockdep_assert_held(&guc->contexts_lock);
1127
1128         ret = new_guc_id(guc);
1129         if (unlikely(ret < 0)) {
1130                 ret = steal_guc_id(guc);
1131                 if (ret < 0)
1132                         return ret;
1133         }
1134
1135         *out = ret;
1136         return 0;
1137 }
1138
1139 #define PIN_GUC_ID_TRIES        4
1140 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1141 {
1142         int ret = 0;
1143         unsigned long flags, tries = PIN_GUC_ID_TRIES;
1144
1145         GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
1146
1147 try_again:
1148         spin_lock_irqsave(&guc->contexts_lock, flags);
1149
1150         if (context_guc_id_invalid(ce)) {
1151                 ret = assign_guc_id(guc, &ce->guc_id);
1152                 if (ret)
1153                         goto out_unlock;
1154                 ret = 1;        /* Indidcates newly assigned guc_id */
1155         }
1156         if (!list_empty(&ce->guc_id_link))
1157                 list_del_init(&ce->guc_id_link);
1158         atomic_inc(&ce->guc_id_ref);
1159
1160 out_unlock:
1161         spin_unlock_irqrestore(&guc->contexts_lock, flags);
1162
1163         /*
1164          * -EAGAIN indicates no guc_ids are available, let's retire any
1165          * outstanding requests to see if that frees up a guc_id. If the first
1166          * retire didn't help, insert a sleep with the timeslice duration before
1167          * attempting to retire more requests. Double the sleep period each
1168          * subsequent pass before finally giving up. The sleep period has max of
1169          * 100ms and minimum of 1ms.
1170          */
1171         if (ret == -EAGAIN && --tries) {
1172                 if (PIN_GUC_ID_TRIES - tries > 1) {
1173                         unsigned int timeslice_shifted =
1174                                 ce->engine->props.timeslice_duration_ms <<
1175                                 (PIN_GUC_ID_TRIES - tries - 2);
1176                         unsigned int max = min_t(unsigned int, 100,
1177                                                  timeslice_shifted);
1178
1179                         msleep(max_t(unsigned int, max, 1));
1180                 }
1181                 intel_gt_retire_requests(guc_to_gt(guc));
1182                 goto try_again;
1183         }
1184
1185         return ret;
1186 }
1187
1188 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1189 {
1190         unsigned long flags;
1191
1192         GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
1193
1194         if (unlikely(context_guc_id_invalid(ce)))
1195                 return;
1196
1197         spin_lock_irqsave(&guc->contexts_lock, flags);
1198         if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
1199             !atomic_read(&ce->guc_id_ref))
1200                 list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
1201         spin_unlock_irqrestore(&guc->contexts_lock, flags);
1202 }
1203
1204 static int __guc_action_register_context(struct intel_guc *guc,
1205                                          u32 guc_id,
1206                                          u32 offset,
1207                                          bool loop)
1208 {
1209         u32 action[] = {
1210                 INTEL_GUC_ACTION_REGISTER_CONTEXT,
1211                 guc_id,
1212                 offset,
1213         };
1214
1215         return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1216                                              0, loop);
1217 }
1218
1219 static int register_context(struct intel_context *ce, bool loop)
1220 {
1221         struct intel_guc *guc = ce_to_guc(ce);
1222         u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
1223                 ce->guc_id * sizeof(struct guc_lrc_desc);
1224         int ret;
1225
1226         trace_intel_context_register(ce);
1227
1228         ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
1229         if (likely(!ret))
1230                 set_context_registered(ce);
1231
1232         return ret;
1233 }
1234
1235 static int __guc_action_deregister_context(struct intel_guc *guc,
1236                                            u32 guc_id,
1237                                            bool loop)
1238 {
1239         u32 action[] = {
1240                 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
1241                 guc_id,
1242         };
1243
1244         return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1245                                              G2H_LEN_DW_DEREGISTER_CONTEXT,
1246                                              loop);
1247 }
1248
1249 static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
1250 {
1251         struct intel_guc *guc = ce_to_guc(ce);
1252
1253         trace_intel_context_deregister(ce);
1254
1255         return __guc_action_deregister_context(guc, guc_id, loop);
1256 }
1257
1258 static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
1259 {
1260         switch (class) {
1261         case RENDER_CLASS:
1262                 return mask >> RCS0;
1263         case VIDEO_ENHANCEMENT_CLASS:
1264                 return mask >> VECS0;
1265         case VIDEO_DECODE_CLASS:
1266                 return mask >> VCS0;
1267         case COPY_ENGINE_CLASS:
1268                 return mask >> BCS0;
1269         default:
1270                 MISSING_CASE(class);
1271                 return 0;
1272         }
1273 }
1274
1275 static void guc_context_policy_init(struct intel_engine_cs *engine,
1276                                     struct guc_lrc_desc *desc)
1277 {
1278         desc->policy_flags = 0;
1279
1280         if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
1281                 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
1282
1283         /* NB: For both of these, zero means disabled. */
1284         desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
1285         desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
1286 }
1287
1288 static inline u8 map_i915_prio_to_guc_prio(int prio);
1289
1290 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
1291 {
1292         struct intel_engine_cs *engine = ce->engine;
1293         struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
1294         struct intel_guc *guc = &engine->gt->uc.guc;
1295         u32 desc_idx = ce->guc_id;
1296         struct guc_lrc_desc *desc;
1297         const struct i915_gem_context *ctx;
1298         int prio = I915_CONTEXT_DEFAULT_PRIORITY;
1299         bool context_registered;
1300         intel_wakeref_t wakeref;
1301         int ret = 0;
1302
1303         GEM_BUG_ON(!engine->mask);
1304
1305         /*
1306          * Ensure LRC + CT vmas are is same region as write barrier is done
1307          * based on CT vma region.
1308          */
1309         GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
1310                    i915_gem_object_is_lmem(ce->ring->vma->obj));
1311
1312         context_registered = lrc_desc_registered(guc, desc_idx);
1313
1314         rcu_read_lock();
1315         ctx = rcu_dereference(ce->gem_context);
1316         if (ctx)
1317                 prio = ctx->sched.priority;
1318         rcu_read_unlock();
1319
1320         reset_lrc_desc(guc, desc_idx);
1321         set_lrc_desc_registered(guc, desc_idx, ce);
1322
1323         desc = __get_lrc_desc(guc, desc_idx);
1324         desc->engine_class = engine_class_to_guc_class(engine->class);
1325         desc->engine_submit_mask = adjust_engine_mask(engine->class,
1326                                                       engine->mask);
1327         desc->hw_context_desc = ce->lrc.lrca;
1328         ce->guc_prio = map_i915_prio_to_guc_prio(prio);
1329         desc->priority = ce->guc_prio;
1330         desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
1331         guc_context_policy_init(engine, desc);
1332         init_sched_state(ce);
1333
1334         /*
1335          * The context_lookup xarray is used to determine if the hardware
1336          * context is currently registered. There are two cases in which it
1337          * could be registered either the guc_id has been stolen from another
1338          * context or the lrc descriptor address of this context has changed. In
1339          * either case the context needs to be deregistered with the GuC before
1340          * registering this context.
1341          */
1342         if (context_registered) {
1343                 trace_intel_context_steal_guc_id(ce);
1344                 if (!loop) {
1345                         set_context_wait_for_deregister_to_register(ce);
1346                         intel_context_get(ce);
1347                 } else {
1348                         bool disabled;
1349                         unsigned long flags;
1350
1351                         /* Seal race with Reset */
1352                         spin_lock_irqsave(&ce->guc_state.lock, flags);
1353                         disabled = submission_disabled(guc);
1354                         if (likely(!disabled)) {
1355                                 set_context_wait_for_deregister_to_register(ce);
1356                                 intel_context_get(ce);
1357                         }
1358                         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1359                         if (unlikely(disabled)) {
1360                                 reset_lrc_desc(guc, desc_idx);
1361                                 return 0;       /* Will get registered later */
1362                         }
1363                 }
1364
1365                 /*
1366                  * If stealing the guc_id, this ce has the same guc_id as the
1367                  * context whose guc_id was stolen.
1368                  */
1369                 with_intel_runtime_pm(runtime_pm, wakeref)
1370                         ret = deregister_context(ce, ce->guc_id, loop);
1371                 if (unlikely(ret == -EBUSY)) {
1372                         clr_context_wait_for_deregister_to_register(ce);
1373                         intel_context_put(ce);
1374                 } else if (unlikely(ret == -ENODEV)) {
1375                         ret = 0;        /* Will get registered later */
1376                 }
1377         } else {
1378                 with_intel_runtime_pm(runtime_pm, wakeref)
1379                         ret = register_context(ce, loop);
1380                 if (unlikely(ret == -EBUSY))
1381                         reset_lrc_desc(guc, desc_idx);
1382                 else if (unlikely(ret == -ENODEV))
1383                         ret = 0;        /* Will get registered later */
1384         }
1385
1386         return ret;
1387 }
1388
1389 static int __guc_context_pre_pin(struct intel_context *ce,
1390                                  struct intel_engine_cs *engine,
1391                                  struct i915_gem_ww_ctx *ww,
1392                                  void **vaddr)
1393 {
1394         return lrc_pre_pin(ce, engine, ww, vaddr);
1395 }
1396
1397 static int __guc_context_pin(struct intel_context *ce,
1398                              struct intel_engine_cs *engine,
1399                              void *vaddr)
1400 {
1401         if (i915_ggtt_offset(ce->state) !=
1402             (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
1403                 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
1404
1405         /*
1406          * GuC context gets pinned in guc_request_alloc. See that function for
1407          * explaination of why.
1408          */
1409
1410         return lrc_pin(ce, engine, vaddr);
1411 }
1412
1413 static int guc_context_pre_pin(struct intel_context *ce,
1414                                struct i915_gem_ww_ctx *ww,
1415                                void **vaddr)
1416 {
1417         return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
1418 }
1419
1420 static int guc_context_pin(struct intel_context *ce, void *vaddr)
1421 {
1422         return __guc_context_pin(ce, ce->engine, vaddr);
1423 }
1424
1425 static void guc_context_unpin(struct intel_context *ce)
1426 {
1427         struct intel_guc *guc = ce_to_guc(ce);
1428
1429         unpin_guc_id(guc, ce);
1430         lrc_unpin(ce);
1431 }
1432
1433 static void guc_context_post_unpin(struct intel_context *ce)
1434 {
1435         lrc_post_unpin(ce);
1436 }
1437
1438 static void __guc_context_sched_enable(struct intel_guc *guc,
1439                                        struct intel_context *ce)
1440 {
1441         u32 action[] = {
1442                 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
1443                 ce->guc_id,
1444                 GUC_CONTEXT_ENABLE
1445         };
1446
1447         trace_intel_context_sched_enable(ce);
1448
1449         guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1450                                       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
1451 }
1452
1453 static void __guc_context_sched_disable(struct intel_guc *guc,
1454                                         struct intel_context *ce,
1455                                         u16 guc_id)
1456 {
1457         u32 action[] = {
1458                 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
1459                 guc_id, /* ce->guc_id not stable */
1460                 GUC_CONTEXT_DISABLE
1461         };
1462
1463         GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
1464
1465         trace_intel_context_sched_disable(ce);
1466
1467         guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1468                                       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
1469 }
1470
1471 static void guc_blocked_fence_complete(struct intel_context *ce)
1472 {
1473         lockdep_assert_held(&ce->guc_state.lock);
1474
1475         if (!i915_sw_fence_done(&ce->guc_blocked))
1476                 i915_sw_fence_complete(&ce->guc_blocked);
1477 }
1478
1479 static void guc_blocked_fence_reinit(struct intel_context *ce)
1480 {
1481         lockdep_assert_held(&ce->guc_state.lock);
1482         GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked));
1483
1484         /*
1485          * This fence is always complete unless a pending schedule disable is
1486          * outstanding. We arm the fence here and complete it when we receive
1487          * the pending schedule disable complete message.
1488          */
1489         i915_sw_fence_fini(&ce->guc_blocked);
1490         i915_sw_fence_reinit(&ce->guc_blocked);
1491         i915_sw_fence_await(&ce->guc_blocked);
1492         i915_sw_fence_commit(&ce->guc_blocked);
1493 }
1494
1495 static u16 prep_context_pending_disable(struct intel_context *ce)
1496 {
1497         lockdep_assert_held(&ce->guc_state.lock);
1498
1499         set_context_pending_disable(ce);
1500         clr_context_enabled(ce);
1501         guc_blocked_fence_reinit(ce);
1502         intel_context_get(ce);
1503
1504         return ce->guc_id;
1505 }
1506
1507 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
1508 {
1509         struct intel_guc *guc = ce_to_guc(ce);
1510         struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
1511         unsigned long flags;
1512         struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
1513         intel_wakeref_t wakeref;
1514         u16 guc_id;
1515         bool enabled;
1516
1517         spin_lock_irqsave(&ce->guc_state.lock, flags);
1518
1519         /*
1520          * Sync with submission path, increment before below changes to context
1521          * state.
1522          */
1523         spin_lock(&sched_engine->lock);
1524         incr_context_blocked(ce);
1525         spin_unlock(&sched_engine->lock);
1526
1527         enabled = context_enabled(ce);
1528         if (unlikely(!enabled || submission_disabled(guc))) {
1529                 if (enabled)
1530                         clr_context_enabled(ce);
1531                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1532                 return &ce->guc_blocked;
1533         }
1534
1535         /*
1536          * We add +2 here as the schedule disable complete CTB handler calls
1537          * intel_context_sched_disable_unpin (-2 to pin_count).
1538          */
1539         atomic_add(2, &ce->pin_count);
1540
1541         guc_id = prep_context_pending_disable(ce);
1542
1543         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1544
1545         with_intel_runtime_pm(runtime_pm, wakeref)
1546                 __guc_context_sched_disable(guc, ce, guc_id);
1547
1548         return &ce->guc_blocked;
1549 }
1550
1551 static void guc_context_unblock(struct intel_context *ce)
1552 {
1553         struct intel_guc *guc = ce_to_guc(ce);
1554         struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
1555         unsigned long flags;
1556         struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
1557         intel_wakeref_t wakeref;
1558         bool enable;
1559
1560         GEM_BUG_ON(context_enabled(ce));
1561
1562         spin_lock_irqsave(&ce->guc_state.lock, flags);
1563
1564         if (unlikely(submission_disabled(guc) ||
1565                      !intel_context_is_pinned(ce) ||
1566                      context_pending_disable(ce) ||
1567                      context_blocked(ce) > 1)) {
1568                 enable = false;
1569         } else {
1570                 enable = true;
1571                 set_context_pending_enable(ce);
1572                 set_context_enabled(ce);
1573                 intel_context_get(ce);
1574         }
1575
1576         /*
1577          * Sync with submission path, decrement after above changes to context
1578          * state.
1579          */
1580         spin_lock(&sched_engine->lock);
1581         decr_context_blocked(ce);
1582         spin_unlock(&sched_engine->lock);
1583
1584         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1585
1586         if (enable) {
1587                 with_intel_runtime_pm(runtime_pm, wakeref)
1588                         __guc_context_sched_enable(guc, ce);
1589         }
1590 }
1591
1592 static void guc_context_cancel_request(struct intel_context *ce,
1593                                        struct i915_request *rq)
1594 {
1595         if (i915_sw_fence_signaled(&rq->submit)) {
1596                 struct i915_sw_fence *fence = guc_context_block(ce);
1597
1598                 i915_sw_fence_wait(fence);
1599                 if (!i915_request_completed(rq)) {
1600                         __i915_request_skip(rq);
1601                         guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
1602                                         true);
1603                 }
1604                 guc_context_unblock(ce);
1605         }
1606 }
1607
1608 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
1609                                                  u16 guc_id,
1610                                                  u32 preemption_timeout)
1611 {
1612         u32 action[] = {
1613                 INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
1614                 guc_id,
1615                 preemption_timeout
1616         };
1617
1618         intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
1619 }
1620
1621 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
1622 {
1623         struct intel_guc *guc = ce_to_guc(ce);
1624         struct intel_runtime_pm *runtime_pm =
1625                 &ce->engine->gt->i915->runtime_pm;
1626         intel_wakeref_t wakeref;
1627         unsigned long flags;
1628
1629         guc_flush_submissions(guc);
1630
1631         spin_lock_irqsave(&ce->guc_state.lock, flags);
1632         set_context_banned(ce);
1633
1634         if (submission_disabled(guc) ||
1635             (!context_enabled(ce) && !context_pending_disable(ce))) {
1636                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1637
1638                 guc_cancel_context_requests(ce);
1639                 intel_engine_signal_breadcrumbs(ce->engine);
1640         } else if (!context_pending_disable(ce)) {
1641                 u16 guc_id;
1642
1643                 /*
1644                  * We add +2 here as the schedule disable complete CTB handler
1645                  * calls intel_context_sched_disable_unpin (-2 to pin_count).
1646                  */
1647                 atomic_add(2, &ce->pin_count);
1648
1649                 guc_id = prep_context_pending_disable(ce);
1650                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1651
1652                 /*
1653                  * In addition to disabling scheduling, set the preemption
1654                  * timeout to the minimum value (1 us) so the banned context
1655                  * gets kicked off the HW ASAP.
1656                  */
1657                 with_intel_runtime_pm(runtime_pm, wakeref) {
1658                         __guc_context_set_preemption_timeout(guc, guc_id, 1);
1659                         __guc_context_sched_disable(guc, ce, guc_id);
1660                 }
1661         } else {
1662                 if (!context_guc_id_invalid(ce))
1663                         with_intel_runtime_pm(runtime_pm, wakeref)
1664                                 __guc_context_set_preemption_timeout(guc,
1665                                                                      ce->guc_id,
1666                                                                      1);
1667                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1668         }
1669 }
1670
1671 static void guc_context_sched_disable(struct intel_context *ce)
1672 {
1673         struct intel_guc *guc = ce_to_guc(ce);
1674         unsigned long flags;
1675         struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
1676         intel_wakeref_t wakeref;
1677         u16 guc_id;
1678         bool enabled;
1679
1680         if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
1681             !lrc_desc_registered(guc, ce->guc_id)) {
1682                 clr_context_enabled(ce);
1683                 goto unpin;
1684         }
1685
1686         if (!context_enabled(ce))
1687                 goto unpin;
1688
1689         spin_lock_irqsave(&ce->guc_state.lock, flags);
1690
1691         /*
1692          * We have to check if the context has been disabled by another thread.
1693          * We also have to check if the context has been pinned again as another
1694          * pin operation is allowed to pass this function. Checking the pin
1695          * count, within ce->guc_state.lock, synchronizes this function with
1696          * guc_request_alloc ensuring a request doesn't slip through the
1697          * 'context_pending_disable' fence. Checking within the spin lock (can't
1698          * sleep) ensures another process doesn't pin this context and generate
1699          * a request before we set the 'context_pending_disable' flag here.
1700          */
1701         enabled = context_enabled(ce);
1702         if (unlikely(!enabled || submission_disabled(guc))) {
1703                 if (enabled)
1704                         clr_context_enabled(ce);
1705                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1706                 goto unpin;
1707         }
1708         if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) {
1709                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1710                 return;
1711         }
1712         guc_id = prep_context_pending_disable(ce);
1713
1714         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1715
1716         with_intel_runtime_pm(runtime_pm, wakeref)
1717                 __guc_context_sched_disable(guc, ce, guc_id);
1718
1719         return;
1720 unpin:
1721         intel_context_sched_disable_unpin(ce);
1722 }
1723
1724 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
1725 {
1726         struct intel_guc *guc = ce_to_guc(ce);
1727
1728         GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
1729         GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
1730         GEM_BUG_ON(context_enabled(ce));
1731
1732         clr_context_registered(ce);
1733         deregister_context(ce, ce->guc_id, true);
1734 }
1735
1736 static void __guc_context_destroy(struct intel_context *ce)
1737 {
1738         GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
1739                    ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
1740                    ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
1741                    ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
1742
1743         lrc_fini(ce);
1744         intel_context_fini(ce);
1745
1746         if (intel_engine_is_virtual(ce->engine)) {
1747                 struct guc_virtual_engine *ve =
1748                         container_of(ce, typeof(*ve), context);
1749
1750                 if (ve->base.breadcrumbs)
1751                         intel_breadcrumbs_put(ve->base.breadcrumbs);
1752
1753                 kfree(ve);
1754         } else {
1755                 intel_context_free(ce);
1756         }
1757 }
1758
1759 static void guc_context_destroy(struct kref *kref)
1760 {
1761         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1762         struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
1763         struct intel_guc *guc = ce_to_guc(ce);
1764         intel_wakeref_t wakeref;
1765         unsigned long flags;
1766         bool disabled;
1767
1768         /*
1769          * If the guc_id is invalid this context has been stolen and we can free
1770          * it immediately. Also can be freed immediately if the context is not
1771          * registered with the GuC or the GuC is in the middle of a reset.
1772          */
1773         if (context_guc_id_invalid(ce)) {
1774                 __guc_context_destroy(ce);
1775                 return;
1776         } else if (submission_disabled(guc) ||
1777                    !lrc_desc_registered(guc, ce->guc_id)) {
1778                 release_guc_id(guc, ce);
1779                 __guc_context_destroy(ce);
1780                 return;
1781         }
1782
1783         /*
1784          * We have to acquire the context spinlock and check guc_id again, if it
1785          * is valid it hasn't been stolen and needs to be deregistered. We
1786          * delete this context from the list of unpinned guc_ids available to
1787          * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
1788          * returns indicating this context has been deregistered the guc_id is
1789          * returned to the pool of available guc_ids.
1790          */
1791         spin_lock_irqsave(&guc->contexts_lock, flags);
1792         if (context_guc_id_invalid(ce)) {
1793                 spin_unlock_irqrestore(&guc->contexts_lock, flags);
1794                 __guc_context_destroy(ce);
1795                 return;
1796         }
1797
1798         if (!list_empty(&ce->guc_id_link))
1799                 list_del_init(&ce->guc_id_link);
1800         spin_unlock_irqrestore(&guc->contexts_lock, flags);
1801
1802         /* Seal race with Reset */
1803         spin_lock_irqsave(&ce->guc_state.lock, flags);
1804         disabled = submission_disabled(guc);
1805         if (likely(!disabled))
1806                 set_context_destroyed(ce);
1807         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1808         if (unlikely(disabled)) {
1809                 release_guc_id(guc, ce);
1810                 __guc_context_destroy(ce);
1811                 return;
1812         }
1813
1814         /*
1815          * We defer GuC context deregistration until the context is destroyed
1816          * in order to save on CTBs. With this optimization ideally we only need
1817          * 1 CTB to register the context during the first pin and 1 CTB to
1818          * deregister the context when the context is destroyed. Without this
1819          * optimization, a CTB would be needed every pin & unpin.
1820          *
1821          * XXX: Need to acqiure the runtime wakeref as this can be triggered
1822          * from context_free_worker when runtime wakeref is not held.
1823          * guc_lrc_desc_unpin requires the runtime as a GuC register is written
1824          * in H2G CTB to deregister the context. A future patch may defer this
1825          * H2G CTB if the runtime wakeref is zero.
1826          */
1827         with_intel_runtime_pm(runtime_pm, wakeref)
1828                 guc_lrc_desc_unpin(ce);
1829 }
1830
1831 static int guc_context_alloc(struct intel_context *ce)
1832 {
1833         return lrc_alloc(ce, ce->engine);
1834 }
1835
1836 static void guc_context_set_prio(struct intel_guc *guc,
1837                                  struct intel_context *ce,
1838                                  u8 prio)
1839 {
1840         u32 action[] = {
1841                 INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
1842                 ce->guc_id,
1843                 prio,
1844         };
1845
1846         GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
1847                    prio > GUC_CLIENT_PRIORITY_NORMAL);
1848
1849         if (ce->guc_prio == prio || submission_disabled(guc) ||
1850             !context_registered(ce))
1851                 return;
1852
1853         guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
1854
1855         ce->guc_prio = prio;
1856         trace_intel_context_set_prio(ce);
1857 }
1858
1859 static inline u8 map_i915_prio_to_guc_prio(int prio)
1860 {
1861         if (prio == I915_PRIORITY_NORMAL)
1862                 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
1863         else if (prio < I915_PRIORITY_NORMAL)
1864                 return GUC_CLIENT_PRIORITY_NORMAL;
1865         else if (prio < I915_PRIORITY_DISPLAY)
1866                 return GUC_CLIENT_PRIORITY_HIGH;
1867         else
1868                 return GUC_CLIENT_PRIORITY_KMD_HIGH;
1869 }
1870
1871 static inline void add_context_inflight_prio(struct intel_context *ce,
1872                                              u8 guc_prio)
1873 {
1874         lockdep_assert_held(&ce->guc_active.lock);
1875         GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
1876
1877         ++ce->guc_prio_count[guc_prio];
1878
1879         /* Overflow protection */
1880         GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
1881 }
1882
1883 static inline void sub_context_inflight_prio(struct intel_context *ce,
1884                                              u8 guc_prio)
1885 {
1886         lockdep_assert_held(&ce->guc_active.lock);
1887         GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
1888
1889         /* Underflow protection */
1890         GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
1891
1892         --ce->guc_prio_count[guc_prio];
1893 }
1894
1895 static inline void update_context_prio(struct intel_context *ce)
1896 {
1897         struct intel_guc *guc = &ce->engine->gt->uc.guc;
1898         int i;
1899
1900         BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
1901         BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
1902
1903         lockdep_assert_held(&ce->guc_active.lock);
1904
1905         for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) {
1906                 if (ce->guc_prio_count[i]) {
1907                         guc_context_set_prio(guc, ce, i);
1908                         break;
1909                 }
1910         }
1911 }
1912
1913 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
1914 {
1915         /* Lower value is higher priority */
1916         return new_guc_prio < old_guc_prio;
1917 }
1918
1919 static void add_to_context(struct i915_request *rq)
1920 {
1921         struct intel_context *ce = rq->context;
1922         u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
1923
1924         GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
1925
1926         spin_lock(&ce->guc_active.lock);
1927         list_move_tail(&rq->sched.link, &ce->guc_active.requests);
1928
1929         if (rq->guc_prio == GUC_PRIO_INIT) {
1930                 rq->guc_prio = new_guc_prio;
1931                 add_context_inflight_prio(ce, rq->guc_prio);
1932         } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
1933                 sub_context_inflight_prio(ce, rq->guc_prio);
1934                 rq->guc_prio = new_guc_prio;
1935                 add_context_inflight_prio(ce, rq->guc_prio);
1936         }
1937         update_context_prio(ce);
1938
1939         spin_unlock(&ce->guc_active.lock);
1940 }
1941
1942 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
1943 {
1944         lockdep_assert_held(&ce->guc_active.lock);
1945
1946         if (rq->guc_prio != GUC_PRIO_INIT &&
1947             rq->guc_prio != GUC_PRIO_FINI) {
1948                 sub_context_inflight_prio(ce, rq->guc_prio);
1949                 update_context_prio(ce);
1950         }
1951         rq->guc_prio = GUC_PRIO_FINI;
1952 }
1953
1954 static void remove_from_context(struct i915_request *rq)
1955 {
1956         struct intel_context *ce = rq->context;
1957
1958         spin_lock_irq(&ce->guc_active.lock);
1959
1960         list_del_init(&rq->sched.link);
1961         clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1962
1963         /* Prevent further __await_execution() registering a cb, then flush */
1964         set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1965
1966         guc_prio_fini(rq, ce);
1967
1968         spin_unlock_irq(&ce->guc_active.lock);
1969
1970         atomic_dec(&ce->guc_id_ref);
1971         i915_request_notify_execute_cb_imm(rq);
1972 }
1973
1974 static const struct intel_context_ops guc_context_ops = {
1975         .alloc = guc_context_alloc,
1976
1977         .pre_pin = guc_context_pre_pin,
1978         .pin = guc_context_pin,
1979         .unpin = guc_context_unpin,
1980         .post_unpin = guc_context_post_unpin,
1981
1982         .ban = guc_context_ban,
1983
1984         .cancel_request = guc_context_cancel_request,
1985
1986         .enter = intel_context_enter_engine,
1987         .exit = intel_context_exit_engine,
1988
1989         .sched_disable = guc_context_sched_disable,
1990
1991         .reset = lrc_reset,
1992         .destroy = guc_context_destroy,
1993
1994         .create_virtual = guc_create_virtual,
1995 };
1996
1997 static void __guc_signal_context_fence(struct intel_context *ce)
1998 {
1999         struct i915_request *rq;
2000
2001         lockdep_assert_held(&ce->guc_state.lock);
2002
2003         if (!list_empty(&ce->guc_state.fences))
2004                 trace_intel_context_fence_release(ce);
2005
2006         list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link)
2007                 i915_sw_fence_complete(&rq->submit);
2008
2009         INIT_LIST_HEAD(&ce->guc_state.fences);
2010 }
2011
2012 static void guc_signal_context_fence(struct intel_context *ce)
2013 {
2014         unsigned long flags;
2015
2016         spin_lock_irqsave(&ce->guc_state.lock, flags);
2017         clr_context_wait_for_deregister_to_register(ce);
2018         __guc_signal_context_fence(ce);
2019         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2020 }
2021
2022 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
2023 {
2024         return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
2025                 !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) &&
2026                 !submission_disabled(ce_to_guc(ce));
2027 }
2028
2029 static int guc_request_alloc(struct i915_request *rq)
2030 {
2031         struct intel_context *ce = rq->context;
2032         struct intel_guc *guc = ce_to_guc(ce);
2033         unsigned long flags;
2034         int ret;
2035
2036         GEM_BUG_ON(!intel_context_is_pinned(rq->context));
2037
2038         /*
2039          * Flush enough space to reduce the likelihood of waiting after
2040          * we start building the request - in which case we will just
2041          * have to repeat work.
2042          */
2043         rq->reserved_space += GUC_REQUEST_SIZE;
2044
2045         /*
2046          * Note that after this point, we have committed to using
2047          * this request as it is being used to both track the
2048          * state of engine initialisation and liveness of the
2049          * golden renderstate above. Think twice before you try
2050          * to cancel/unwind this request now.
2051          */
2052
2053         /* Unconditionally invalidate GPU caches and TLBs. */
2054         ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
2055         if (ret)
2056                 return ret;
2057
2058         rq->reserved_space -= GUC_REQUEST_SIZE;
2059
2060         /*
2061          * Call pin_guc_id here rather than in the pinning step as with
2062          * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
2063          * guc_ids and creating horrible race conditions. This is especially bad
2064          * when guc_ids are being stolen due to over subscription. By the time
2065          * this function is reached, it is guaranteed that the guc_id will be
2066          * persistent until the generated request is retired. Thus, sealing these
2067          * race conditions. It is still safe to fail here if guc_ids are
2068          * exhausted and return -EAGAIN to the user indicating that they can try
2069          * again in the future.
2070          *
2071          * There is no need for a lock here as the timeline mutex ensures at
2072          * most one context can be executing this code path at once. The
2073          * guc_id_ref is incremented once for every request in flight and
2074          * decremented on each retire. When it is zero, a lock around the
2075          * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
2076          */
2077         if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
2078                 goto out;
2079
2080         ret = pin_guc_id(guc, ce);      /* returns 1 if new guc_id assigned */
2081         if (unlikely(ret < 0))
2082                 return ret;
2083         if (context_needs_register(ce, !!ret)) {
2084                 ret = guc_lrc_desc_pin(ce, true);
2085                 if (unlikely(ret)) {    /* unwind */
2086                         if (ret == -EPIPE) {
2087                                 disable_submission(guc);
2088                                 goto out;       /* GPU will be reset */
2089                         }
2090                         atomic_dec(&ce->guc_id_ref);
2091                         unpin_guc_id(guc, ce);
2092                         return ret;
2093                 }
2094         }
2095
2096         clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2097
2098 out:
2099         /*
2100          * We block all requests on this context if a G2H is pending for a
2101          * schedule disable or context deregistration as the GuC will fail a
2102          * schedule enable or context registration if either G2H is pending
2103          * respectfully. Once a G2H returns, the fence is released that is
2104          * blocking these requests (see guc_signal_context_fence).
2105          *
2106          * We can safely check the below fields outside of the lock as it isn't
2107          * possible for these fields to transition from being clear to set but
2108          * converse is possible, hence the need for the check within the lock.
2109          */
2110         if (likely(!context_wait_for_deregister_to_register(ce) &&
2111                    !context_pending_disable(ce)))
2112                 return 0;
2113
2114         spin_lock_irqsave(&ce->guc_state.lock, flags);
2115         if (context_wait_for_deregister_to_register(ce) ||
2116             context_pending_disable(ce)) {
2117                 i915_sw_fence_await(&rq->submit);
2118
2119                 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
2120         }
2121         spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2122
2123         return 0;
2124 }
2125
2126 static int guc_virtual_context_pre_pin(struct intel_context *ce,
2127                                        struct i915_gem_ww_ctx *ww,
2128                                        void **vaddr)
2129 {
2130         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2131
2132         return __guc_context_pre_pin(ce, engine, ww, vaddr);
2133 }
2134
2135 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
2136 {
2137         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2138
2139         return __guc_context_pin(ce, engine, vaddr);
2140 }
2141
2142 static void guc_virtual_context_enter(struct intel_context *ce)
2143 {
2144         intel_engine_mask_t tmp, mask = ce->engine->mask;
2145         struct intel_engine_cs *engine;
2146
2147         for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2148                 intel_engine_pm_get(engine);
2149
2150         intel_timeline_enter(ce->timeline);
2151 }
2152
2153 static void guc_virtual_context_exit(struct intel_context *ce)
2154 {
2155         intel_engine_mask_t tmp, mask = ce->engine->mask;
2156         struct intel_engine_cs *engine;
2157
2158         for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2159                 intel_engine_pm_put(engine);
2160
2161         intel_timeline_exit(ce->timeline);
2162 }
2163
2164 static int guc_virtual_context_alloc(struct intel_context *ce)
2165 {
2166         struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2167
2168         return lrc_alloc(ce, engine);
2169 }
2170
2171 static const struct intel_context_ops virtual_guc_context_ops = {
2172         .alloc = guc_virtual_context_alloc,
2173
2174         .pre_pin = guc_virtual_context_pre_pin,
2175         .pin = guc_virtual_context_pin,
2176         .unpin = guc_context_unpin,
2177         .post_unpin = guc_context_post_unpin,
2178
2179         .ban = guc_context_ban,
2180
2181         .cancel_request = guc_context_cancel_request,
2182
2183         .enter = guc_virtual_context_enter,
2184         .exit = guc_virtual_context_exit,
2185
2186         .sched_disable = guc_context_sched_disable,
2187
2188         .destroy = guc_context_destroy,
2189
2190         .get_sibling = guc_virtual_get_sibling,
2191 };
2192
2193 static bool
2194 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
2195 {
2196         struct intel_engine_cs *sibling;
2197         intel_engine_mask_t tmp, mask = b->engine_mask;
2198         bool result = false;
2199
2200         for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
2201                 result |= intel_engine_irq_enable(sibling);
2202
2203         return result;
2204 }
2205
2206 static void
2207 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
2208 {
2209         struct intel_engine_cs *sibling;
2210         intel_engine_mask_t tmp, mask = b->engine_mask;
2211
2212         for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
2213                 intel_engine_irq_disable(sibling);
2214 }
2215
2216 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
2217 {
2218         int i;
2219
2220         /*
2221          * In GuC submission mode we do not know which physical engine a request
2222          * will be scheduled on, this creates a problem because the breadcrumb
2223          * interrupt is per physical engine. To work around this we attach
2224          * requests and direct all breadcrumb interrupts to the first instance
2225          * of an engine per class. In addition all breadcrumb interrupts are
2226          * enabled / disabled across an engine class in unison.
2227          */
2228         for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
2229                 struct intel_engine_cs *sibling =
2230                         engine->gt->engine_class[engine->class][i];
2231
2232                 if (sibling) {
2233                         if (engine->breadcrumbs != sibling->breadcrumbs) {
2234                                 intel_breadcrumbs_put(engine->breadcrumbs);
2235                                 engine->breadcrumbs =
2236                                         intel_breadcrumbs_get(sibling->breadcrumbs);
2237                         }
2238                         break;
2239                 }
2240         }
2241
2242         if (engine->breadcrumbs) {
2243                 engine->breadcrumbs->engine_mask |= engine->mask;
2244                 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
2245                 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
2246         }
2247 }
2248
2249 static void guc_bump_inflight_request_prio(struct i915_request *rq,
2250                                            int prio)
2251 {
2252         struct intel_context *ce = rq->context;
2253         u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
2254
2255         /* Short circuit function */
2256         if (prio < I915_PRIORITY_NORMAL ||
2257             rq->guc_prio == GUC_PRIO_FINI ||
2258             (rq->guc_prio != GUC_PRIO_INIT &&
2259              !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
2260                 return;
2261
2262         spin_lock(&ce->guc_active.lock);
2263         if (rq->guc_prio != GUC_PRIO_FINI) {
2264                 if (rq->guc_prio != GUC_PRIO_INIT)
2265                         sub_context_inflight_prio(ce, rq->guc_prio);
2266                 rq->guc_prio = new_guc_prio;
2267                 add_context_inflight_prio(ce, rq->guc_prio);
2268                 update_context_prio(ce);
2269         }
2270         spin_unlock(&ce->guc_active.lock);
2271 }
2272
2273 static void guc_retire_inflight_request_prio(struct i915_request *rq)
2274 {
2275         struct intel_context *ce = rq->context;
2276
2277         spin_lock(&ce->guc_active.lock);
2278         guc_prio_fini(rq, ce);
2279         spin_unlock(&ce->guc_active.lock);
2280 }
2281
2282 static void sanitize_hwsp(struct intel_engine_cs *engine)
2283 {
2284         struct intel_timeline *tl;
2285
2286         list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
2287                 intel_timeline_reset_seqno(tl);
2288 }
2289
2290 static void guc_sanitize(struct intel_engine_cs *engine)
2291 {
2292         /*
2293          * Poison residual state on resume, in case the suspend didn't!
2294          *
2295          * We have to assume that across suspend/resume (or other loss
2296          * of control) that the contents of our pinned buffers has been
2297          * lost, replaced by garbage. Since this doesn't always happen,
2298          * let's poison such state so that we more quickly spot when
2299          * we falsely assume it has been preserved.
2300          */
2301         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
2302                 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
2303
2304         /*
2305          * The kernel_context HWSP is stored in the status_page. As above,
2306          * that may be lost on resume/initialisation, and so we need to
2307          * reset the value in the HWSP.
2308          */
2309         sanitize_hwsp(engine);
2310
2311         /* And scrub the dirty cachelines for the HWSP */
2312         clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
2313 }
2314
2315 static void setup_hwsp(struct intel_engine_cs *engine)
2316 {
2317         intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
2318
2319         ENGINE_WRITE_FW(engine,
2320                         RING_HWS_PGA,
2321                         i915_ggtt_offset(engine->status_page.vma));
2322 }
2323
2324 static void start_engine(struct intel_engine_cs *engine)
2325 {
2326         ENGINE_WRITE_FW(engine,
2327                         RING_MODE_GEN7,
2328                         _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
2329
2330         ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
2331         ENGINE_POSTING_READ(engine, RING_MI_MODE);
2332 }
2333
2334 static int guc_resume(struct intel_engine_cs *engine)
2335 {
2336         assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
2337
2338         intel_mocs_init_engine(engine);
2339
2340         intel_breadcrumbs_reset(engine->breadcrumbs);
2341
2342         setup_hwsp(engine);
2343         start_engine(engine);
2344
2345         return 0;
2346 }
2347
2348 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
2349 {
2350         return !sched_engine->tasklet.callback;
2351 }
2352
2353 static void guc_set_default_submission(struct intel_engine_cs *engine)
2354 {
2355         engine->submit_request = guc_submit_request;
2356 }
2357
2358 static inline void guc_kernel_context_pin(struct intel_guc *guc,
2359                                           struct intel_context *ce)
2360 {
2361         if (context_guc_id_invalid(ce))
2362                 pin_guc_id(guc, ce);
2363         guc_lrc_desc_pin(ce, true);
2364 }
2365
2366 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
2367 {
2368         struct intel_gt *gt = guc_to_gt(guc);
2369         struct intel_engine_cs *engine;
2370         enum intel_engine_id id;
2371
2372         /* make sure all descriptors are clean... */
2373         xa_destroy(&guc->context_lookup);
2374
2375         /*
2376          * Some contexts might have been pinned before we enabled GuC
2377          * submission, so we need to add them to the GuC bookeeping.
2378          * Also, after a reset the of the GuC we want to make sure that the
2379          * information shared with GuC is properly reset. The kernel LRCs are
2380          * not attached to the gem_context, so they need to be added separately.
2381          *
2382          * Note: we purposefully do not check the return of guc_lrc_desc_pin,
2383          * because that function can only fail if a reset is just starting. This
2384          * is at the end of reset so presumably another reset isn't happening
2385          * and even it did this code would be run again.
2386          */
2387
2388         for_each_engine(engine, gt, id)
2389                 if (engine->kernel_context)
2390                         guc_kernel_context_pin(guc, engine->kernel_context);
2391 }
2392
2393 static void guc_release(struct intel_engine_cs *engine)
2394 {
2395         engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
2396
2397         intel_engine_cleanup_common(engine);
2398         lrc_fini_wa_ctx(engine);
2399 }
2400
2401 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
2402 {
2403         struct intel_engine_cs *e;
2404         intel_engine_mask_t tmp, mask = engine->mask;
2405
2406         for_each_engine_masked(e, engine->gt, mask, tmp)
2407                 e->serial++;
2408 }
2409
2410 static void guc_default_vfuncs(struct intel_engine_cs *engine)
2411 {
2412         /* Default vfuncs which can be overridden by each engine. */
2413
2414         engine->resume = guc_resume;
2415
2416         engine->cops = &guc_context_ops;
2417         engine->request_alloc = guc_request_alloc;
2418         engine->add_active_request = add_to_context;
2419         engine->remove_active_request = remove_from_context;
2420
2421         engine->sched_engine->schedule = i915_schedule;
2422
2423         engine->reset.prepare = guc_reset_nop;
2424         engine->reset.rewind = guc_rewind_nop;
2425         engine->reset.cancel = guc_reset_nop;
2426         engine->reset.finish = guc_reset_nop;
2427
2428         engine->emit_flush = gen8_emit_flush_xcs;
2429         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
2430         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
2431         if (GRAPHICS_VER(engine->i915) >= 12) {
2432                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
2433                 engine->emit_flush = gen12_emit_flush_xcs;
2434         }
2435         engine->set_default_submission = guc_set_default_submission;
2436
2437         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
2438         engine->flags |= I915_ENGINE_HAS_TIMESLICES;
2439
2440         /*
2441          * TODO: GuC supports timeslicing and semaphores as well, but they're
2442          * handled by the firmware so some minor tweaks are required before
2443          * enabling.
2444          *
2445          * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
2446          */
2447
2448         engine->emit_bb_start = gen8_emit_bb_start;
2449 }
2450
2451 static void rcs_submission_override(struct intel_engine_cs *engine)
2452 {
2453         switch (GRAPHICS_VER(engine->i915)) {
2454         case 12:
2455                 engine->emit_flush = gen12_emit_flush_rcs;
2456                 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
2457                 break;
2458         case 11:
2459                 engine->emit_flush = gen11_emit_flush_rcs;
2460                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
2461                 break;
2462         default:
2463                 engine->emit_flush = gen8_emit_flush_rcs;
2464                 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
2465                 break;
2466         }
2467 }
2468
2469 static inline void guc_default_irqs(struct intel_engine_cs *engine)
2470 {
2471         engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
2472         intel_engine_set_irq_handler(engine, cs_irq_handler);
2473 }
2474
2475 static void guc_sched_engine_destroy(struct kref *kref)
2476 {
2477         struct i915_sched_engine *sched_engine =
2478                 container_of(kref, typeof(*sched_engine), ref);
2479         struct intel_guc *guc = sched_engine->private_data;
2480
2481         guc->sched_engine = NULL;
2482         tasklet_kill(&sched_engine->tasklet); /* flush the callback */
2483         kfree(sched_engine);
2484 }
2485
2486 int intel_guc_submission_setup(struct intel_engine_cs *engine)
2487 {
2488         struct drm_i915_private *i915 = engine->i915;
2489         struct intel_guc *guc = &engine->gt->uc.guc;
2490
2491         /*
2492          * The setup relies on several assumptions (e.g. irqs always enabled)
2493          * that are only valid on gen11+
2494          */
2495         GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
2496
2497         if (!guc->sched_engine) {
2498                 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
2499                 if (!guc->sched_engine)
2500                         return -ENOMEM;
2501
2502                 guc->sched_engine->schedule = i915_schedule;
2503                 guc->sched_engine->disabled = guc_sched_engine_disabled;
2504                 guc->sched_engine->private_data = guc;
2505                 guc->sched_engine->destroy = guc_sched_engine_destroy;
2506                 guc->sched_engine->bump_inflight_request_prio =
2507                         guc_bump_inflight_request_prio;
2508                 guc->sched_engine->retire_inflight_request_prio =
2509                         guc_retire_inflight_request_prio;
2510                 tasklet_setup(&guc->sched_engine->tasklet,
2511                               guc_submission_tasklet);
2512         }
2513         i915_sched_engine_put(engine->sched_engine);
2514         engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
2515
2516         guc_default_vfuncs(engine);
2517         guc_default_irqs(engine);
2518         guc_init_breadcrumbs(engine);
2519
2520         if (engine->class == RENDER_CLASS)
2521                 rcs_submission_override(engine);
2522
2523         lrc_init_wa_ctx(engine);
2524
2525         /* Finally, take ownership and responsibility for cleanup! */
2526         engine->sanitize = guc_sanitize;
2527         engine->release = guc_release;
2528
2529         return 0;
2530 }
2531
2532 void intel_guc_submission_enable(struct intel_guc *guc)
2533 {
2534         guc_init_lrc_mapping(guc);
2535 }
2536
2537 void intel_guc_submission_disable(struct intel_guc *guc)
2538 {
2539         /* Note: By the time we're here, GuC may have already been reset */
2540 }
2541
2542 static bool __guc_submission_supported(struct intel_guc *guc)
2543 {
2544         /* GuC submission is unavailable for pre-Gen11 */
2545         return intel_guc_is_supported(guc) &&
2546                GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
2547 }
2548
2549 static bool __guc_submission_selected(struct intel_guc *guc)
2550 {
2551         struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
2552
2553         if (!intel_guc_submission_is_supported(guc))
2554                 return false;
2555
2556         return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
2557 }
2558
2559 void intel_guc_submission_init_early(struct intel_guc *guc)
2560 {
2561         guc->submission_supported = __guc_submission_supported(guc);
2562         guc->submission_selected = __guc_submission_selected(guc);
2563 }
2564
2565 static inline struct intel_context *
2566 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
2567 {
2568         struct intel_context *ce;
2569
2570         if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
2571                 drm_err(&guc_to_gt(guc)->i915->drm,
2572                         "Invalid desc_idx %u", desc_idx);
2573                 return NULL;
2574         }
2575
2576         ce = __get_context(guc, desc_idx);
2577         if (unlikely(!ce)) {
2578                 drm_err(&guc_to_gt(guc)->i915->drm,
2579                         "Context is NULL, desc_idx %u", desc_idx);
2580                 return NULL;
2581         }
2582
2583         return ce;
2584 }
2585
2586 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
2587 {
2588         if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
2589                 wake_up_all(&guc->ct.wq);
2590 }
2591
2592 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
2593                                           const u32 *msg,
2594                                           u32 len)
2595 {
2596         struct intel_context *ce;
2597         u32 desc_idx = msg[0];
2598
2599         if (unlikely(len < 1)) {
2600                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
2601                 return -EPROTO;
2602         }
2603
2604         ce = g2h_context_lookup(guc, desc_idx);
2605         if (unlikely(!ce))
2606                 return -EPROTO;
2607
2608         trace_intel_context_deregister_done(ce);
2609
2610         if (context_wait_for_deregister_to_register(ce)) {
2611                 struct intel_runtime_pm *runtime_pm =
2612                         &ce->engine->gt->i915->runtime_pm;
2613                 intel_wakeref_t wakeref;
2614
2615                 /*
2616                  * Previous owner of this guc_id has been deregistered, now safe
2617                  * register this context.
2618                  */
2619                 with_intel_runtime_pm(runtime_pm, wakeref)
2620                         register_context(ce, true);
2621                 guc_signal_context_fence(ce);
2622                 intel_context_put(ce);
2623         } else if (context_destroyed(ce)) {
2624                 /* Context has been destroyed */
2625                 release_guc_id(guc, ce);
2626                 __guc_context_destroy(ce);
2627         }
2628
2629         decr_outstanding_submission_g2h(guc);
2630
2631         return 0;
2632 }
2633
2634 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
2635                                      const u32 *msg,
2636                                      u32 len)
2637 {
2638         struct intel_context *ce;
2639         unsigned long flags;
2640         u32 desc_idx = msg[0];
2641
2642         if (unlikely(len < 2)) {
2643                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
2644                 return -EPROTO;
2645         }
2646
2647         ce = g2h_context_lookup(guc, desc_idx);
2648         if (unlikely(!ce))
2649                 return -EPROTO;
2650
2651         if (unlikely(context_destroyed(ce) ||
2652                      (!context_pending_enable(ce) &&
2653                      !context_pending_disable(ce)))) {
2654                 drm_err(&guc_to_gt(guc)->i915->drm,
2655                         "Bad context sched_state 0x%x, 0x%x, desc_idx %u",
2656                         atomic_read(&ce->guc_sched_state_no_lock),
2657                         ce->guc_state.sched_state, desc_idx);
2658                 return -EPROTO;
2659         }
2660
2661         trace_intel_context_sched_done(ce);
2662
2663         if (context_pending_enable(ce)) {
2664                 clr_context_pending_enable(ce);
2665         } else if (context_pending_disable(ce)) {
2666                 bool banned;
2667
2668                 /*
2669                  * Unpin must be done before __guc_signal_context_fence,
2670                  * otherwise a race exists between the requests getting
2671                  * submitted + retired before this unpin completes resulting in
2672                  * the pin_count going to zero and the context still being
2673                  * enabled.
2674                  */
2675                 intel_context_sched_disable_unpin(ce);
2676
2677                 spin_lock_irqsave(&ce->guc_state.lock, flags);
2678                 banned = context_banned(ce);
2679                 clr_context_banned(ce);
2680                 clr_context_pending_disable(ce);
2681                 __guc_signal_context_fence(ce);
2682                 guc_blocked_fence_complete(ce);
2683                 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2684
2685                 if (banned) {
2686                         guc_cancel_context_requests(ce);
2687                         intel_engine_signal_breadcrumbs(ce->engine);
2688                 }
2689         }
2690
2691         decr_outstanding_submission_g2h(guc);
2692         intel_context_put(ce);
2693
2694         return 0;
2695 }
2696
2697 static void capture_error_state(struct intel_guc *guc,
2698                                 struct intel_context *ce)
2699 {
2700         struct intel_gt *gt = guc_to_gt(guc);
2701         struct drm_i915_private *i915 = gt->i915;
2702         struct intel_engine_cs *engine = __context_to_physical_engine(ce);
2703         intel_wakeref_t wakeref;
2704
2705         intel_engine_set_hung_context(engine, ce);
2706         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2707                 i915_capture_error_state(gt, engine->mask);
2708         atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
2709 }
2710
2711 static void guc_context_replay(struct intel_context *ce)
2712 {
2713         struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
2714
2715         __guc_reset_context(ce, true);
2716         tasklet_hi_schedule(&sched_engine->tasklet);
2717 }
2718
2719 static void guc_handle_context_reset(struct intel_guc *guc,
2720                                      struct intel_context *ce)
2721 {
2722         trace_intel_context_reset(ce);
2723
2724         if (likely(!intel_context_is_banned(ce))) {
2725                 capture_error_state(guc, ce);
2726                 guc_context_replay(ce);
2727         }
2728 }
2729
2730 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
2731                                         const u32 *msg, u32 len)
2732 {
2733         struct intel_context *ce;
2734         int desc_idx;
2735
2736         if (unlikely(len != 1)) {
2737                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
2738                 return -EPROTO;
2739         }
2740
2741         desc_idx = msg[0];
2742         ce = g2h_context_lookup(guc, desc_idx);
2743         if (unlikely(!ce))
2744                 return -EPROTO;
2745
2746         guc_handle_context_reset(guc, ce);
2747
2748         return 0;
2749 }
2750
2751 static struct intel_engine_cs *
2752 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
2753 {
2754         struct intel_gt *gt = guc_to_gt(guc);
2755         u8 engine_class = guc_class_to_engine_class(guc_class);
2756
2757         /* Class index is checked in class converter */
2758         GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
2759
2760         return gt->engine_class[engine_class][instance];
2761 }
2762
2763 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
2764                                          const u32 *msg, u32 len)
2765 {
2766         struct intel_engine_cs *engine;
2767         u8 guc_class, instance;
2768         u32 reason;
2769
2770         if (unlikely(len != 3)) {
2771                 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
2772                 return -EPROTO;
2773         }
2774
2775         guc_class = msg[0];
2776         instance = msg[1];
2777         reason = msg[2];
2778
2779         engine = guc_lookup_engine(guc, guc_class, instance);
2780         if (unlikely(!engine)) {
2781                 drm_err(&guc_to_gt(guc)->i915->drm,
2782                         "Invalid engine %d:%d", guc_class, instance);
2783                 return -EPROTO;
2784         }
2785
2786         intel_gt_handle_error(guc_to_gt(guc), engine->mask,
2787                               I915_ERROR_CAPTURE,
2788                               "GuC failed to reset %s (reason=0x%08x)\n",
2789                               engine->name, reason);
2790
2791         return 0;
2792 }
2793
2794 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
2795 {
2796         struct intel_guc *guc = &engine->gt->uc.guc;
2797         struct intel_context *ce;
2798         struct i915_request *rq;
2799         unsigned long index;
2800
2801         /* Reset called during driver load? GuC not yet initialised! */
2802         if (unlikely(!guc_submission_initialized(guc)))
2803                 return;
2804
2805         xa_for_each(&guc->context_lookup, index, ce) {
2806                 if (!intel_context_is_pinned(ce))
2807                         continue;
2808
2809                 if (intel_engine_is_virtual(ce->engine)) {
2810                         if (!(ce->engine->mask & engine->mask))
2811                                 continue;
2812                 } else {
2813                         if (ce->engine != engine)
2814                                 continue;
2815                 }
2816
2817                 list_for_each_entry(rq, &ce->guc_active.requests, sched.link) {
2818                         if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
2819                                 continue;
2820
2821                         intel_engine_set_hung_context(engine, ce);
2822
2823                         /* Can only cope with one hang at a time... */
2824                         return;
2825                 }
2826         }
2827 }
2828
2829 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
2830                                     struct i915_request *hung_rq,
2831                                     struct drm_printer *m)
2832 {
2833         struct intel_guc *guc = &engine->gt->uc.guc;
2834         struct intel_context *ce;
2835         unsigned long index;
2836         unsigned long flags;
2837
2838         /* Reset called during driver load? GuC not yet initialised! */
2839         if (unlikely(!guc_submission_initialized(guc)))
2840                 return;
2841
2842         xa_for_each(&guc->context_lookup, index, ce) {
2843                 if (!intel_context_is_pinned(ce))
2844                         continue;
2845
2846                 if (intel_engine_is_virtual(ce->engine)) {
2847                         if (!(ce->engine->mask & engine->mask))
2848                                 continue;
2849                 } else {
2850                         if (ce->engine != engine)
2851                                 continue;
2852                 }
2853
2854                 spin_lock_irqsave(&ce->guc_active.lock, flags);
2855                 intel_engine_dump_active_requests(&ce->guc_active.requests,
2856                                                   hung_rq, m);
2857                 spin_unlock_irqrestore(&ce->guc_active.lock, flags);
2858         }
2859 }
2860
2861 void intel_guc_submission_print_info(struct intel_guc *guc,
2862                                      struct drm_printer *p)
2863 {
2864         struct i915_sched_engine *sched_engine = guc->sched_engine;
2865         struct rb_node *rb;
2866         unsigned long flags;
2867
2868         if (!sched_engine)
2869                 return;
2870
2871         drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
2872                    atomic_read(&guc->outstanding_submission_g2h));
2873         drm_printf(p, "GuC tasklet count: %u\n\n",
2874                    atomic_read(&sched_engine->tasklet.count));
2875
2876         spin_lock_irqsave(&sched_engine->lock, flags);
2877         drm_printf(p, "Requests in GuC submit tasklet:\n");
2878         for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
2879                 struct i915_priolist *pl = to_priolist(rb);
2880                 struct i915_request *rq;
2881
2882                 priolist_for_each_request(rq, pl)
2883                         drm_printf(p, "guc_id=%u, seqno=%llu\n",
2884                                    rq->context->guc_id,
2885                                    rq->fence.seqno);
2886         }
2887         spin_unlock_irqrestore(&sched_engine->lock, flags);
2888         drm_printf(p, "\n");
2889 }
2890
2891 static inline void guc_log_context_priority(struct drm_printer *p,
2892                                             struct intel_context *ce)
2893 {
2894         int i;
2895
2896         drm_printf(p, "\t\tPriority: %d\n",
2897                    ce->guc_prio);
2898         drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
2899         for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
2900              i < GUC_CLIENT_PRIORITY_NUM; ++i) {
2901                 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
2902                            i, ce->guc_prio_count[i]);
2903         }
2904         drm_printf(p, "\n");
2905 }
2906
2907 void intel_guc_submission_print_context_info(struct intel_guc *guc,
2908                                              struct drm_printer *p)
2909 {
2910         struct intel_context *ce;
2911         unsigned long index;
2912
2913         xa_for_each(&guc->context_lookup, index, ce) {
2914                 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id);
2915                 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
2916                 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
2917                            ce->ring->head,
2918                            ce->lrc_reg_state[CTX_RING_HEAD]);
2919                 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
2920                            ce->ring->tail,
2921                            ce->lrc_reg_state[CTX_RING_TAIL]);
2922                 drm_printf(p, "\t\tContext Pin Count: %u\n",
2923                            atomic_read(&ce->pin_count));
2924                 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
2925                            atomic_read(&ce->guc_id_ref));
2926                 drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
2927                            ce->guc_state.sched_state,
2928                            atomic_read(&ce->guc_sched_state_no_lock));
2929
2930                 guc_log_context_priority(p, ce);
2931         }
2932 }
2933
2934 static struct intel_context *
2935 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
2936 {
2937         struct guc_virtual_engine *ve;
2938         struct intel_guc *guc;
2939         unsigned int n;
2940         int err;
2941
2942         ve = kzalloc(sizeof(*ve), GFP_KERNEL);
2943         if (!ve)
2944                 return ERR_PTR(-ENOMEM);
2945
2946         guc = &siblings[0]->gt->uc.guc;
2947
2948         ve->base.i915 = siblings[0]->i915;
2949         ve->base.gt = siblings[0]->gt;
2950         ve->base.uncore = siblings[0]->uncore;
2951         ve->base.id = -1;
2952
2953         ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
2954         ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
2955         ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
2956         ve->base.saturated = ALL_ENGINES;
2957
2958         snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
2959
2960         ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
2961
2962         ve->base.cops = &virtual_guc_context_ops;
2963         ve->base.request_alloc = guc_request_alloc;
2964         ve->base.bump_serial = virtual_guc_bump_serial;
2965
2966         ve->base.submit_request = guc_submit_request;
2967
2968         ve->base.flags = I915_ENGINE_IS_VIRTUAL;
2969
2970         intel_context_init(&ve->context, &ve->base);
2971
2972         for (n = 0; n < count; n++) {
2973                 struct intel_engine_cs *sibling = siblings[n];
2974
2975                 GEM_BUG_ON(!is_power_of_2(sibling->mask));
2976                 if (sibling->mask & ve->base.mask) {
2977                         DRM_DEBUG("duplicate %s entry in load balancer\n",
2978                                   sibling->name);
2979                         err = -EINVAL;
2980                         goto err_put;
2981                 }
2982
2983                 ve->base.mask |= sibling->mask;
2984
2985                 if (n != 0 && ve->base.class != sibling->class) {
2986                         DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
2987                                   sibling->class, ve->base.class);
2988                         err = -EINVAL;
2989                         goto err_put;
2990                 } else if (n == 0) {
2991                         ve->base.class = sibling->class;
2992                         ve->base.uabi_class = sibling->uabi_class;
2993                         snprintf(ve->base.name, sizeof(ve->base.name),
2994                                  "v%dx%d", ve->base.class, count);
2995                         ve->base.context_size = sibling->context_size;
2996
2997                         ve->base.add_active_request =
2998                                 sibling->add_active_request;
2999                         ve->base.remove_active_request =
3000                                 sibling->remove_active_request;
3001                         ve->base.emit_bb_start = sibling->emit_bb_start;
3002                         ve->base.emit_flush = sibling->emit_flush;
3003                         ve->base.emit_init_breadcrumb =
3004                                 sibling->emit_init_breadcrumb;
3005                         ve->base.emit_fini_breadcrumb =
3006                                 sibling->emit_fini_breadcrumb;
3007                         ve->base.emit_fini_breadcrumb_dw =
3008                                 sibling->emit_fini_breadcrumb_dw;
3009                         ve->base.breadcrumbs =
3010                                 intel_breadcrumbs_get(sibling->breadcrumbs);
3011
3012                         ve->base.flags |= sibling->flags;
3013
3014                         ve->base.props.timeslice_duration_ms =
3015                                 sibling->props.timeslice_duration_ms;
3016                         ve->base.props.preempt_timeout_ms =
3017                                 sibling->props.preempt_timeout_ms;
3018                 }
3019         }
3020
3021         return &ve->context;
3022
3023 err_put:
3024         intel_context_put(&ve->context);
3025         return ERR_PTR(err);
3026 }
3027
3028 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
3029 {
3030         struct intel_engine_cs *engine;
3031         intel_engine_mask_t tmp, mask = ve->mask;
3032
3033         for_each_engine_masked(engine, ve->gt, mask, tmp)
3034                 if (READ_ONCE(engine->props.heartbeat_interval_ms))
3035                         return true;
3036
3037         return false;
3038 }