drivers/gpu/drm/xe/xe_guc_submit.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2022 Intel Corporation
   4  */
   5
   6 #include "xe_guc_submit.h"
   7
   8 #include <linux/bitfield.h>
   9 #include <linux/bitmap.h>
  10 #include <linux/circ_buf.h>
  11 #include <linux/delay.h>
  12 #include <linux/dma-fence-array.h>
  13 #include <linux/math64.h>
  14
  15 #include <drm/drm_managed.h>
  16
  17 #include "abi/guc_actions_abi.h"
  18 #include "abi/guc_klvs_abi.h"
  19 #include "regs/xe_lrc_layout.h"
  20 #include "xe_assert.h"
  21 #include "xe_devcoredump.h"
  22 #include "xe_device.h"
  23 #include "xe_exec_queue.h"
  24 #include "xe_force_wake.h"
  25 #include "xe_gpu_scheduler.h"
  26 #include "xe_gt.h"
  27 #include "xe_gt_clock.h"
  28 #include "xe_gt_printk.h"
  29 #include "xe_guc.h"
  30 #include "xe_guc_capture.h"
  31 #include "xe_guc_ct.h"
  32 #include "xe_guc_exec_queue_types.h"
  33 #include "xe_guc_id_mgr.h"
  34 #include "xe_guc_submit_types.h"
  35 #include "xe_hw_engine.h"
  36 #include "xe_hw_fence.h"
  37 #include "xe_lrc.h"
  38 #include "xe_macros.h"
  39 #include "xe_map.h"
  40 #include "xe_mocs.h"
  41 #include "xe_pm.h"
  42 #include "xe_ring_ops_types.h"
  43 #include "xe_sched_job.h"
  44 #include "xe_trace.h"
  45 #include "xe_vm.h"
  46
  47 static struct xe_guc *
  48 exec_queue_to_guc(struct xe_exec_queue *q)
  49 {
  50         return &q->gt->uc.guc;
  51 }
  52
  53 /*
  54  * Helpers for engine state, using an atomic as some of the bits can transition
  55  * as the same time (e.g. a suspend can be happning at the same time as schedule
  56  * engine done being processed).
  57  */
  58 #define EXEC_QUEUE_STATE_REGISTERED             (1 << 0)
  59 #define EXEC_QUEUE_STATE_ENABLED                (1 << 1)
  60 #define EXEC_QUEUE_STATE_PENDING_ENABLE         (1 << 2)
  61 #define EXEC_QUEUE_STATE_PENDING_DISABLE        (1 << 3)
  62 #define EXEC_QUEUE_STATE_DESTROYED              (1 << 4)
  63 #define EXEC_QUEUE_STATE_SUSPENDED              (1 << 5)
  64 #define EXEC_QUEUE_STATE_RESET                  (1 << 6)
  65 #define EXEC_QUEUE_STATE_KILLED                 (1 << 7)
  66 #define EXEC_QUEUE_STATE_WEDGED                 (1 << 8)
  67 #define EXEC_QUEUE_STATE_BANNED                 (1 << 9)
  68 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT          (1 << 10)
  69 #define EXEC_QUEUE_STATE_EXTRA_REF              (1 << 11)
  70
  71 static bool exec_queue_registered(struct xe_exec_queue *q)
  72 {
  73         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
  74 }
  75
  76 static void set_exec_queue_registered(struct xe_exec_queue *q)
  77 {
  78         atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  79 }
  80
  81 static void clear_exec_queue_registered(struct xe_exec_queue *q)
  82 {
  83         atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
  84 }
  85
  86 static bool exec_queue_enabled(struct xe_exec_queue *q)
  87 {
  88         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
  89 }
  90
  91 static void set_exec_queue_enabled(struct xe_exec_queue *q)
  92 {
  93         atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
  94 }
  95
  96 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
  97 {
  98         atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
  99 }
 100
 101 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
 102 {
 103         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
 104 }
 105
 106 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
 107 {
 108         atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 109 }
 110
 111 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
 112 {
 113         atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
 114 }
 115
 116 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
 117 {
 118         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
 119 }
 120
 121 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
 122 {
 123         atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 124 }
 125
 126 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
 127 {
 128         atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
 129 }
 130
 131 static bool exec_queue_destroyed(struct xe_exec_queue *q)
 132 {
 133         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
 134 }
 135
 136 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
 137 {
 138         atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
 139 }
 140
 141 static bool exec_queue_banned(struct xe_exec_queue *q)
 142 {
 143         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
 144 }
 145
 146 static void set_exec_queue_banned(struct xe_exec_queue *q)
 147 {
 148         atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
 149 }
 150
 151 static bool exec_queue_suspended(struct xe_exec_queue *q)
 152 {
 153         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
 154 }
 155
 156 static void set_exec_queue_suspended(struct xe_exec_queue *q)
 157 {
 158         atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 159 }
 160
 161 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
 162 {
 163         atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 164 }
 165
 166 static bool exec_queue_reset(struct xe_exec_queue *q)
 167 {
 168         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
 169 }
 170
 171 static void set_exec_queue_reset(struct xe_exec_queue *q)
 172 {
 173         atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
 174 }
 175
 176 static bool exec_queue_killed(struct xe_exec_queue *q)
 177 {
 178         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
 179 }
 180
 181 static void set_exec_queue_killed(struct xe_exec_queue *q)
 182 {
 183         atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
 184 }
 185
 186 static bool exec_queue_wedged(struct xe_exec_queue *q)
 187 {
 188         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
 189 }
 190
 191 static void set_exec_queue_wedged(struct xe_exec_queue *q)
 192 {
 193         atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
 194 }
 195
 196 static bool exec_queue_check_timeout(struct xe_exec_queue *q)
 197 {
 198         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
 199 }
 200
 201 static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
 202 {
 203         atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
 204 }
 205
 206 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
 207 {
 208         atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
 209 }
 210
 211 static bool exec_queue_extra_ref(struct xe_exec_queue *q)
 212 {
 213         return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
 214 }
 215
 216 static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
 217 {
 218         atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
 219 }
 220
 221 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
 222 {
 223         return (atomic_read(&q->guc->state) &
 224                 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
 225                  EXEC_QUEUE_STATE_BANNED));
 226 }
 227
 228 static void guc_submit_fini(struct drm_device *drm, void *arg)
 229 {
 230         struct xe_guc *guc = arg;
 231
 232         xa_destroy(&guc->submission_state.exec_queue_lookup);
 233 }
 234
 235 static void guc_submit_wedged_fini(void *arg)
 236 {
 237         struct xe_guc *guc = arg;
 238         struct xe_exec_queue *q;
 239         unsigned long index;
 240
 241         mutex_lock(&guc->submission_state.lock);
 242         xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
 243                 if (exec_queue_wedged(q)) {
 244                         mutex_unlock(&guc->submission_state.lock);
 245                         xe_exec_queue_put(q);
 246                         mutex_lock(&guc->submission_state.lock);
 247                 }
 248         }
 249         mutex_unlock(&guc->submission_state.lock);
 250 }
 251
 252 static const struct xe_exec_queue_ops guc_exec_queue_ops;
 253
 254 static void primelockdep(struct xe_guc *guc)
 255 {
 256         if (!IS_ENABLED(CONFIG_LOCKDEP))
 257                 return;
 258
 259         fs_reclaim_acquire(GFP_KERNEL);
 260
 261         mutex_lock(&guc->submission_state.lock);
 262         mutex_unlock(&guc->submission_state.lock);
 263
 264         fs_reclaim_release(GFP_KERNEL);
 265 }
 266
 267 /**
 268  * xe_guc_submit_init() - Initialize GuC submission.
 269  * @guc: the &xe_guc to initialize
 270  * @num_ids: number of GuC context IDs to use
 271  *
 272  * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
 273  * GuC context IDs supported by the GuC firmware should be used for submission.
 274  *
 275  * Only VF drivers will have to provide explicit number of GuC context IDs
 276  * that they can use for submission.
 277  *
 278  * Return: 0 on success or a negative error code on failure.
 279  */
 280 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
 281 {
 282         struct xe_device *xe = guc_to_xe(guc);
 283         struct xe_gt *gt = guc_to_gt(guc);
 284         int err;
 285
 286         err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
 287         if (err)
 288                 return err;
 289
 290         err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
 291         if (err)
 292                 return err;
 293
 294         gt->exec_queue_ops = &guc_exec_queue_ops;
 295
 296         xa_init(&guc->submission_state.exec_queue_lookup);
 297
 298         init_waitqueue_head(&guc->submission_state.fini_wq);
 299
 300         primelockdep(guc);
 301
 302         return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
 303 }
 304
 305 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
 306 {
 307         int i;
 308
 309         lockdep_assert_held(&guc->submission_state.lock);
 310
 311         for (i = 0; i < xa_count; ++i)
 312                 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
 313
 314         xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
 315                                      q->guc->id, q->width);
 316
 317         if (xa_empty(&guc->submission_state.exec_queue_lookup))
 318                 wake_up(&guc->submission_state.fini_wq);
 319 }
 320
 321 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 322 {
 323         int ret;
 324         int i;
 325
 326         /*
 327          * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
 328          * worse case user gets -ENOMEM on engine create and has to try again.
 329          *
 330          * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
 331          * failure.
 332          */
 333         lockdep_assert_held(&guc->submission_state.lock);
 334
 335         ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
 336                                            q->width);
 337         if (ret < 0)
 338                 return ret;
 339
 340         q->guc->id = ret;
 341
 342         for (i = 0; i < q->width; ++i) {
 343                 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
 344                                       q->guc->id + i, q, GFP_NOWAIT));
 345                 if (ret)
 346                         goto err_release;
 347         }
 348
 349         return 0;
 350
 351 err_release:
 352         __release_guc_id(guc, q, i);
 353
 354         return ret;
 355 }
 356
 357 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 358 {
 359         mutex_lock(&guc->submission_state.lock);
 360         __release_guc_id(guc, q, q->width);
 361         mutex_unlock(&guc->submission_state.lock);
 362 }
 363
 364 struct exec_queue_policy {
 365         u32 count;
 366         struct guc_update_exec_queue_policy h2g;
 367 };
 368
 369 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
 370 {
 371         size_t bytes = sizeof(policy->h2g.header) +
 372                        (sizeof(policy->h2g.klv[0]) * policy->count);
 373
 374         return bytes / sizeof(u32);
 375 }
 376
 377 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
 378                                               u16 guc_id)
 379 {
 380         policy->h2g.header.action =
 381                 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
 382         policy->h2g.header.guc_id = guc_id;
 383         policy->count = 0;
 384 }
 385
 386 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
 387 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
 388                                            u32 data) \
 389 { \
 390         XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
 391 \
 392         policy->h2g.klv[policy->count].kl = \
 393                 FIELD_PREP(GUC_KLV_0_KEY, \
 394                            GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
 395                 FIELD_PREP(GUC_KLV_0_LEN, 1); \
 396         policy->h2g.klv[policy->count].value = data; \
 397         policy->count++; \
 398 }
 399
 400 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
 401 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
 402 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
 403 #undef MAKE_EXEC_QUEUE_POLICY_ADD
 404
 405 static const int xe_exec_queue_prio_to_guc[] = {
 406         [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
 407         [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
 408         [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
 409         [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
 410 };
 411
 412 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 413 {
 414         struct exec_queue_policy policy;
 415         struct xe_device *xe = guc_to_xe(guc);
 416         enum xe_exec_queue_priority prio = q->sched_props.priority;
 417         u32 timeslice_us = q->sched_props.timeslice_us;
 418         u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 419
 420         xe_assert(xe, exec_queue_registered(q));
 421
 422         __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 423         __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
 424         __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
 425         __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
 426
 427         xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 428                        __guc_exec_queue_policy_action_size(&policy), 0, 0);
 429 }
 430
 431 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
 432 {
 433         struct exec_queue_policy policy;
 434
 435         __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
 436         __guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
 437
 438         xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
 439                        __guc_exec_queue_policy_action_size(&policy), 0, 0);
 440 }
 441
 442 #define parallel_read(xe_, map_, field_) \
 443         xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 444                         field_)
 445 #define parallel_write(xe_, map_, field_, val_) \
 446         xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 447                         field_, val_)
 448
 449 static void __register_mlrc_exec_queue(struct xe_guc *guc,
 450                                        struct xe_exec_queue *q,
 451                                        struct guc_ctxt_registration_info *info)
 452 {
 453 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
 454         struct xe_device *xe = guc_to_xe(guc);
 455         u32 action[MAX_MLRC_REG_SIZE];
 456         int len = 0;
 457         int i;
 458
 459         xe_assert(xe, xe_exec_queue_is_parallel(q));
 460
 461         action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
 462         action[len++] = info->flags;
 463         action[len++] = info->context_idx;
 464         action[len++] = info->engine_class;
 465         action[len++] = info->engine_submit_mask;
 466         action[len++] = info->wq_desc_lo;
 467         action[len++] = info->wq_desc_hi;
 468         action[len++] = info->wq_base_lo;
 469         action[len++] = info->wq_base_hi;
 470         action[len++] = info->wq_size;
 471         action[len++] = q->width;
 472         action[len++] = info->hwlrca_lo;
 473         action[len++] = info->hwlrca_hi;
 474
 475         for (i = 1; i < q->width; ++i) {
 476                 struct xe_lrc *lrc = q->lrc[i];
 477
 478                 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
 479                 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
 480         }
 481
 482         xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
 483 #undef MAX_MLRC_REG_SIZE
 484
 485         xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 486 }
 487
 488 static void __register_exec_queue(struct xe_guc *guc,
 489                                   struct guc_ctxt_registration_info *info)
 490 {
 491         u32 action[] = {
 492                 XE_GUC_ACTION_REGISTER_CONTEXT,
 493                 info->flags,
 494                 info->context_idx,
 495                 info->engine_class,
 496                 info->engine_submit_mask,
 497                 info->wq_desc_lo,
 498                 info->wq_desc_hi,
 499                 info->wq_base_lo,
 500                 info->wq_base_hi,
 501                 info->wq_size,
 502                 info->hwlrca_lo,
 503                 info->hwlrca_hi,
 504         };
 505
 506         xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
 507 }
 508
 509 static void register_exec_queue(struct xe_exec_queue *q)
 510 {
 511         struct xe_guc *guc = exec_queue_to_guc(q);
 512         struct xe_device *xe = guc_to_xe(guc);
 513         struct xe_lrc *lrc = q->lrc[0];
 514         struct guc_ctxt_registration_info info;
 515
 516         xe_assert(xe, !exec_queue_registered(q));
 517
 518         memset(&info, 0, sizeof(info));
 519         info.context_idx = q->guc->id;
 520         info.engine_class = xe_engine_class_to_guc_class(q->class);
 521         info.engine_submit_mask = q->logical_mask;
 522         info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
 523         info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
 524         info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
 525
 526         if (xe_exec_queue_is_parallel(q)) {
 527                 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
 528                 struct iosys_map map = xe_lrc_parallel_map(lrc);
 529
 530                 info.wq_desc_lo = lower_32_bits(ggtt_addr +
 531                         offsetof(struct guc_submit_parallel_scratch, wq_desc));
 532                 info.wq_desc_hi = upper_32_bits(ggtt_addr +
 533                         offsetof(struct guc_submit_parallel_scratch, wq_desc));
 534                 info.wq_base_lo = lower_32_bits(ggtt_addr +
 535                         offsetof(struct guc_submit_parallel_scratch, wq[0]));
 536                 info.wq_base_hi = upper_32_bits(ggtt_addr +
 537                         offsetof(struct guc_submit_parallel_scratch, wq[0]));
 538                 info.wq_size = WQ_SIZE;
 539
 540                 q->guc->wqi_head = 0;
 541                 q->guc->wqi_tail = 0;
 542                 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
 543                 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
 544         }
 545
 546         /*
 547          * We must keep a reference for LR engines if engine is registered with
 548          * the GuC as jobs signal immediately and can't destroy an engine if the
 549          * GuC has a reference to it.
 550          */
 551         if (xe_exec_queue_is_lr(q))
 552                 xe_exec_queue_get(q);
 553
 554         set_exec_queue_registered(q);
 555         trace_xe_exec_queue_register(q);
 556         if (xe_exec_queue_is_parallel(q))
 557                 __register_mlrc_exec_queue(guc, q, &info);
 558         else
 559                 __register_exec_queue(guc, &info);
 560         init_policies(guc, q);
 561 }
 562
 563 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
 564 {
 565         return (WQ_SIZE - q->guc->wqi_tail);
 566 }
 567
 568 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 569 {
 570         struct xe_guc *guc = exec_queue_to_guc(q);
 571         struct xe_device *xe = guc_to_xe(guc);
 572         struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 573         unsigned int sleep_period_ms = 1;
 574
 575 #define AVAILABLE_SPACE \
 576         CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
 577         if (wqi_size > AVAILABLE_SPACE) {
 578 try_again:
 579                 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
 580                 if (wqi_size > AVAILABLE_SPACE) {
 581                         if (sleep_period_ms == 1024) {
 582                                 xe_gt_reset_async(q->gt);
 583                                 return -ENODEV;
 584                         }
 585
 586                         msleep(sleep_period_ms);
 587                         sleep_period_ms <<= 1;
 588                         goto try_again;
 589                 }
 590         }
 591 #undef AVAILABLE_SPACE
 592
 593         return 0;
 594 }
 595
 596 static int wq_noop_append(struct xe_exec_queue *q)
 597 {
 598         struct xe_guc *guc = exec_queue_to_guc(q);
 599         struct xe_device *xe = guc_to_xe(guc);
 600         struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 601         u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
 602
 603         if (wq_wait_for_space(q, wq_space_until_wrap(q)))
 604                 return -ENODEV;
 605
 606         xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
 607
 608         parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
 609                        FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
 610                        FIELD_PREP(WQ_LEN_MASK, len_dw));
 611         q->guc->wqi_tail = 0;
 612
 613         return 0;
 614 }
 615
 616 static void wq_item_append(struct xe_exec_queue *q)
 617 {
 618         struct xe_guc *guc = exec_queue_to_guc(q);
 619         struct xe_device *xe = guc_to_xe(guc);
 620         struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 621 #define WQ_HEADER_SIZE  4       /* Includes 1 LRC address too */
 622         u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
 623         u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
 624         u32 len_dw = (wqi_size / sizeof(u32)) - 1;
 625         int i = 0, j;
 626
 627         if (wqi_size > wq_space_until_wrap(q)) {
 628                 if (wq_noop_append(q))
 629                         return;
 630         }
 631         if (wq_wait_for_space(q, wqi_size))
 632                 return;
 633
 634         wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
 635                 FIELD_PREP(WQ_LEN_MASK, len_dw);
 636         wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
 637         wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
 638                 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
 639         wqi[i++] = 0;
 640         for (j = 1; j < q->width; ++j) {
 641                 struct xe_lrc *lrc = q->lrc[j];
 642
 643                 wqi[i++] = lrc->ring.tail / sizeof(u64);
 644         }
 645
 646         xe_assert(xe, i == wqi_size / sizeof(u32));
 647
 648         iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
 649                                       wq[q->guc->wqi_tail / sizeof(u32)]));
 650         xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 651         q->guc->wqi_tail += wqi_size;
 652         xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
 653
 654         xe_device_wmb(xe);
 655
 656         map = xe_lrc_parallel_map(q->lrc[0]);
 657         parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
 658 }
 659
 660 #define RESUME_PENDING  ~0x0ull
 661 static void submit_exec_queue(struct xe_exec_queue *q)
 662 {
 663         struct xe_guc *guc = exec_queue_to_guc(q);
 664         struct xe_device *xe = guc_to_xe(guc);
 665         struct xe_lrc *lrc = q->lrc[0];
 666         u32 action[3];
 667         u32 g2h_len = 0;
 668         u32 num_g2h = 0;
 669         int len = 0;
 670         bool extra_submit = false;
 671
 672         xe_assert(xe, exec_queue_registered(q));
 673
 674         if (xe_exec_queue_is_parallel(q))
 675                 wq_item_append(q);
 676         else
 677                 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
 678
 679         if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
 680                 return;
 681
 682         if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
 683                 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
 684                 action[len++] = q->guc->id;
 685                 action[len++] = GUC_CONTEXT_ENABLE;
 686                 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
 687                 num_g2h = 1;
 688                 if (xe_exec_queue_is_parallel(q))
 689                         extra_submit = true;
 690
 691                 q->guc->resume_time = RESUME_PENDING;
 692                 set_exec_queue_pending_enable(q);
 693                 set_exec_queue_enabled(q);
 694                 trace_xe_exec_queue_scheduling_enable(q);
 695         } else {
 696                 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
 697                 action[len++] = q->guc->id;
 698                 trace_xe_exec_queue_submit(q);
 699         }
 700
 701         xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
 702
 703         if (extra_submit) {
 704                 len = 0;
 705                 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
 706                 action[len++] = q->guc->id;
 707                 trace_xe_exec_queue_submit(q);
 708
 709                 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 710         }
 711 }
 712
 713 static struct dma_fence *
 714 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 715 {
 716         struct xe_sched_job *job = to_xe_sched_job(drm_job);
 717         struct xe_exec_queue *q = job->q;
 718         struct xe_guc *guc = exec_queue_to_guc(q);
 719         struct xe_device *xe = guc_to_xe(guc);
 720         struct dma_fence *fence = NULL;
 721         bool lr = xe_exec_queue_is_lr(q);
 722
 723         xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
 724                   exec_queue_banned(q) || exec_queue_suspended(q));
 725
 726         trace_xe_sched_job_run(job);
 727
 728         if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
 729                 if (!exec_queue_registered(q))
 730                         register_exec_queue(q);
 731                 if (!lr)        /* LR jobs are emitted in the exec IOCTL */
 732                         q->ring_ops->emit_job(job);
 733                 submit_exec_queue(q);
 734         }
 735
 736         if (lr) {
 737                 xe_sched_job_set_error(job, -EOPNOTSUPP);
 738                 dma_fence_put(job->fence);      /* Drop ref from xe_sched_job_arm */
 739         } else {
 740                 fence = job->fence;
 741         }
 742
 743         return fence;
 744 }
 745
 746 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 747 {
 748         struct xe_sched_job *job = to_xe_sched_job(drm_job);
 749
 750         trace_xe_sched_job_free(job);
 751         xe_sched_job_put(job);
 752 }
 753
 754 int xe_guc_read_stopped(struct xe_guc *guc)
 755 {
 756         return atomic_read(&guc->submission_state.stopped);
 757 }
 758
 759 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)                    \
 760         u32 action[] = {                                                \
 761                 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,                   \
 762                 q->guc->id,                                             \
 763                 GUC_CONTEXT_##enable_disable,                           \
 764         }
 765
 766 static void disable_scheduling_deregister(struct xe_guc *guc,
 767                                           struct xe_exec_queue *q)
 768 {
 769         MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
 770         int ret;
 771
 772         set_min_preemption_timeout(guc, q);
 773         smp_rmb();
 774         ret = wait_event_timeout(guc->ct.wq,
 775                                  (!exec_queue_pending_enable(q) &&
 776                                   !exec_queue_pending_disable(q)) ||
 777                                          xe_guc_read_stopped(guc),
 778                                  HZ * 5);
 779         if (!ret) {
 780                 struct xe_gpu_scheduler *sched = &q->guc->sched;
 781
 782                 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
 783                 xe_sched_submission_start(sched);
 784                 xe_gt_reset_async(q->gt);
 785                 xe_sched_tdr_queue_imm(sched);
 786                 return;
 787         }
 788
 789         clear_exec_queue_enabled(q);
 790         set_exec_queue_pending_disable(q);
 791         set_exec_queue_destroyed(q);
 792         trace_xe_exec_queue_scheduling_disable(q);
 793
 794         /*
 795          * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
 796          * handler and we are not allowed to reserved G2H space in handlers.
 797          */
 798         xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
 799                        G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
 800                        G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
 801 }
 802
 803 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 804 {
 805         struct xe_guc *guc = exec_queue_to_guc(q);
 806         struct xe_device *xe = guc_to_xe(guc);
 807
 808         /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
 809         wake_up_all(&xe->ufence_wq);
 810
 811         if (xe_exec_queue_is_lr(q))
 812                 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
 813         else
 814                 xe_sched_tdr_queue_imm(&q->guc->sched);
 815 }
 816
 817 /**
 818  * xe_guc_submit_wedge() - Wedge GuC submission
 819  * @guc: the GuC object
 820  *
 821  * Save exec queue's registered with GuC state by taking a ref to each queue.
 822  * Register a DRMM handler to drop refs upon driver unload.
 823  */
 824 void xe_guc_submit_wedge(struct xe_guc *guc)
 825 {
 826         struct xe_device *xe = guc_to_xe(guc);
 827         struct xe_exec_queue *q;
 828         unsigned long index;
 829         int err;
 830
 831         xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
 832
 833         err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
 834                                        guc_submit_wedged_fini, guc);
 835         if (err) {
 836                 drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
 837                 return;
 838         }
 839
 840         mutex_lock(&guc->submission_state.lock);
 841         xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
 842                 if (xe_exec_queue_get_unless_zero(q))
 843                         set_exec_queue_wedged(q);
 844         mutex_unlock(&guc->submission_state.lock);
 845 }
 846
 847 static bool guc_submit_hint_wedged(struct xe_guc *guc)
 848 {
 849         struct xe_device *xe = guc_to_xe(guc);
 850
 851         if (xe->wedged.mode != 2)
 852                 return false;
 853
 854         if (xe_device_wedged(xe))
 855                 return true;
 856
 857         xe_device_declare_wedged(xe);
 858
 859         return true;
 860 }
 861
 862 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 863 {
 864         struct xe_guc_exec_queue *ge =
 865                 container_of(w, struct xe_guc_exec_queue, lr_tdr);
 866         struct xe_exec_queue *q = ge->q;
 867         struct xe_guc *guc = exec_queue_to_guc(q);
 868         struct xe_device *xe = guc_to_xe(guc);
 869         struct xe_gpu_scheduler *sched = &ge->sched;
 870         bool wedged;
 871
 872         xe_assert(xe, xe_exec_queue_is_lr(q));
 873         trace_xe_exec_queue_lr_cleanup(q);
 874
 875         wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
 876
 877         /* Kill the run_job / process_msg entry points */
 878         xe_sched_submission_stop(sched);
 879
 880         /*
 881          * Engine state now mostly stable, disable scheduling / deregister if
 882          * needed. This cleanup routine might be called multiple times, where
 883          * the actual async engine deregister drops the final engine ref.
 884          * Calling disable_scheduling_deregister will mark the engine as
 885          * destroyed and fire off the CT requests to disable scheduling /
 886          * deregister, which we only want to do once. We also don't want to mark
 887          * the engine as pending_disable again as this may race with the
 888          * xe_guc_deregister_done_handler() which treats it as an unexpected
 889          * state.
 890          */
 891         if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
 892                 struct xe_guc *guc = exec_queue_to_guc(q);
 893                 int ret;
 894
 895                 set_exec_queue_banned(q);
 896                 disable_scheduling_deregister(guc, q);
 897
 898                 /*
 899                  * Must wait for scheduling to be disabled before signalling
 900                  * any fences, if GT broken the GT reset code should signal us.
 901                  */
 902                 ret = wait_event_timeout(guc->ct.wq,
 903                                          !exec_queue_pending_disable(q) ||
 904                                          xe_guc_read_stopped(guc), HZ * 5);
 905                 if (!ret) {
 906                         drm_warn(&xe->drm, "Schedule disable failed to respond");
 907                         xe_sched_submission_start(sched);
 908                         xe_gt_reset_async(q->gt);
 909                         return;
 910                 }
 911         }
 912
 913         xe_sched_submission_start(sched);
 914 }
 915
 916 #define ADJUST_FIVE_PERCENT(__t)        mul_u64_u32_div(__t, 105, 100)
 917
 918 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
 919 {
 920         struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
 921         u32 ctx_timestamp, ctx_job_timestamp;
 922         u32 timeout_ms = q->sched_props.job_timeout_ms;
 923         u32 diff;
 924         u64 running_time_ms;
 925
 926         if (!xe_sched_job_started(job)) {
 927                 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
 928                            xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
 929                            q->guc->id);
 930
 931                 return xe_sched_invalidate_job(job, 2);
 932         }
 933
 934         ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
 935         ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
 936
 937         /*
 938          * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
 939          * possible overflows with a high timeout.
 940          */
 941         xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
 942
 943         if (ctx_timestamp < ctx_job_timestamp)
 944                 diff = ctx_timestamp + U32_MAX - ctx_job_timestamp;
 945         else
 946                 diff = ctx_timestamp - ctx_job_timestamp;
 947
 948         /*
 949          * Ensure timeout is within 5% to account for an GuC scheduling latency
 950          */
 951         running_time_ms =
 952                 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
 953
 954         xe_gt_dbg(gt,
 955                   "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
 956                   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
 957                   q->guc->id, running_time_ms, timeout_ms, diff);
 958
 959         return running_time_ms >= timeout_ms;
 960 }
 961
 962 static void enable_scheduling(struct xe_exec_queue *q)
 963 {
 964         MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
 965         struct xe_guc *guc = exec_queue_to_guc(q);
 966         int ret;
 967
 968         xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
 969         xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
 970         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
 971         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
 972
 973         set_exec_queue_pending_enable(q);
 974         set_exec_queue_enabled(q);
 975         trace_xe_exec_queue_scheduling_enable(q);
 976
 977         xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
 978                        G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
 979
 980         ret = wait_event_timeout(guc->ct.wq,
 981                                  !exec_queue_pending_enable(q) ||
 982                                  xe_guc_read_stopped(guc), HZ * 5);
 983         if (!ret || xe_guc_read_stopped(guc)) {
 984                 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
 985                 set_exec_queue_banned(q);
 986                 xe_gt_reset_async(q->gt);
 987                 xe_sched_tdr_queue_imm(&q->guc->sched);
 988         }
 989 }
 990
 991 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
 992 {
 993         MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
 994         struct xe_guc *guc = exec_queue_to_guc(q);
 995
 996         xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
 997         xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
 998         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
 999
1000         if (immediate)
1001                 set_min_preemption_timeout(guc, q);
1002         clear_exec_queue_enabled(q);
1003         set_exec_queue_pending_disable(q);
1004         trace_xe_exec_queue_scheduling_disable(q);
1005
1006         xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1007                        G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1008 }
1009
1010 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1011 {
1012         u32 action[] = {
1013                 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1014                 q->guc->id,
1015         };
1016
1017         xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1018         xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1019         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1020         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1021
1022         set_exec_queue_destroyed(q);
1023         trace_xe_exec_queue_deregister(q);
1024
1025         xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1026                        G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
1027 }
1028
1029 static enum drm_gpu_sched_stat
1030 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1031 {
1032         struct xe_sched_job *job = to_xe_sched_job(drm_job);
1033         struct xe_sched_job *tmp_job;
1034         struct xe_exec_queue *q = job->q;
1035         struct xe_gpu_scheduler *sched = &q->guc->sched;
1036         struct xe_guc *guc = exec_queue_to_guc(q);
1037         const char *process_name = "no process";
1038         struct xe_device *xe = guc_to_xe(guc);
1039         unsigned int fw_ref;
1040         int err = -ETIME;
1041         pid_t pid = -1;
1042         int i = 0;
1043         bool wedged, skip_timeout_check;
1044
1045         /*
1046          * TDR has fired before free job worker. Common if exec queue
1047          * immediately closed after last fence signaled. Add back to pending
1048          * list so job can be freed and kick scheduler ensuring free job is not
1049          * lost.
1050          */
1051         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
1052                 xe_sched_add_pending_job(sched, job);
1053                 xe_sched_submission_start(sched);
1054
1055                 return DRM_GPU_SCHED_STAT_NOMINAL;
1056         }
1057
1058         /* Kill the run_job entry point */
1059         xe_sched_submission_stop(sched);
1060
1061         /* Must check all state after stopping scheduler */
1062         skip_timeout_check = exec_queue_reset(q) ||
1063                 exec_queue_killed_or_banned_or_wedged(q) ||
1064                 exec_queue_destroyed(q);
1065
1066         /*
1067          * If devcoredump not captured and GuC capture for the job is not ready
1068          * do manual capture first and decide later if we need to use it
1069          */
1070         if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1071             !xe_guc_capture_get_matching_and_lock(job)) {
1072                 /* take force wake before engine register manual capture */
1073                 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1074                 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
1075                         xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1076
1077                 xe_engine_snapshot_capture_for_job(job);
1078
1079                 xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
1080         }
1081
1082         /*
1083          * XXX: Sampling timeout doesn't work in wedged mode as we have to
1084          * modify scheduling state to read timestamp. We could read the
1085          * timestamp from a register to accumulate current running time but this
1086          * doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1087          * genuine timeouts.
1088          */
1089         wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1090
1091         /* Engine state now stable, disable scheduling to check timestamp */
1092         if (!wedged && exec_queue_registered(q)) {
1093                 int ret;
1094
1095                 if (exec_queue_reset(q))
1096                         err = -EIO;
1097
1098                 if (!exec_queue_destroyed(q)) {
1099                         /*
1100                          * Wait for any pending G2H to flush out before
1101                          * modifying state
1102                          */
1103                         ret = wait_event_timeout(guc->ct.wq,
1104                                                  (!exec_queue_pending_enable(q) &&
1105                                                   !exec_queue_pending_disable(q)) ||
1106                                                  xe_guc_read_stopped(guc), HZ * 5);
1107                         if (!ret || xe_guc_read_stopped(guc))
1108                                 goto trigger_reset;
1109
1110                         /*
1111                          * Flag communicates to G2H handler that schedule
1112                          * disable originated from a timeout check. The G2H then
1113                          * avoid triggering cleanup or deregistering the exec
1114                          * queue.
1115                          */
1116                         set_exec_queue_check_timeout(q);
1117                         disable_scheduling(q, skip_timeout_check);
1118                 }
1119
1120                 /*
1121                  * Must wait for scheduling to be disabled before signalling
1122                  * any fences, if GT broken the GT reset code should signal us.
1123                  *
1124                  * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1125                  * error) messages which can cause the schedule disable to get
1126                  * lost. If this occurs, trigger a GT reset to recover.
1127                  */
1128                 smp_rmb();
1129                 ret = wait_event_timeout(guc->ct.wq,
1130                                          !exec_queue_pending_disable(q) ||
1131                                          xe_guc_read_stopped(guc), HZ * 5);
1132                 if (!ret || xe_guc_read_stopped(guc)) {
1133 trigger_reset:
1134                         if (!ret)
1135                                 xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond");
1136                         set_exec_queue_extra_ref(q);
1137                         xe_exec_queue_get(q);   /* GT reset owns this */
1138                         set_exec_queue_banned(q);
1139                         xe_gt_reset_async(q->gt);
1140                         xe_sched_tdr_queue_imm(sched);
1141                         goto rearm;
1142                 }
1143         }
1144
1145         /*
1146          * Check if job is actually timed out, if so restart job execution and TDR
1147          */
1148         if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1149             !exec_queue_reset(q) && exec_queue_registered(q)) {
1150                 clear_exec_queue_check_timeout(q);
1151                 goto sched_enable;
1152         }
1153
1154         if (q->vm && q->vm->xef) {
1155                 process_name = q->vm->xef->process_name;
1156                 pid = q->vm->xef->pid;
1157         }
1158         xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1159                      xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1160                      q->guc->id, q->flags, process_name, pid);
1161
1162         trace_xe_sched_job_timedout(job);
1163
1164         if (!exec_queue_killed(q))
1165                 xe_devcoredump(job);
1166
1167         /*
1168          * Kernel jobs should never fail, nor should VM jobs if they do
1169          * somethings has gone wrong and the GT needs a reset
1170          */
1171         xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1172                    "Kernel-submitted job timed out\n");
1173         xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1174                    "VM job timed out on non-killed execqueue\n");
1175         if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1176                         (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1177                 if (!xe_sched_invalidate_job(job, 2)) {
1178                         clear_exec_queue_check_timeout(q);
1179                         xe_gt_reset_async(q->gt);
1180                         goto rearm;
1181                 }
1182         }
1183
1184         /* Finish cleaning up exec queue via deregister */
1185         set_exec_queue_banned(q);
1186         if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1187                 set_exec_queue_extra_ref(q);
1188                 xe_exec_queue_get(q);
1189                 __deregister_exec_queue(guc, q);
1190         }
1191
1192         /* Stop fence signaling */
1193         xe_hw_fence_irq_stop(q->fence_irq);
1194
1195         /*
1196          * Fence state now stable, stop / start scheduler which cleans up any
1197          * fences that are complete
1198          */
1199         xe_sched_add_pending_job(sched, job);
1200         xe_sched_submission_start(sched);
1201
1202         xe_guc_exec_queue_trigger_cleanup(q);
1203
1204         /* Mark all outstanding jobs as bad, thus completing them */
1205         spin_lock(&sched->base.job_list_lock);
1206         list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1207                 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1208         spin_unlock(&sched->base.job_list_lock);
1209
1210         /* Start fence signaling */
1211         xe_hw_fence_irq_start(q->fence_irq);
1212
1213         return DRM_GPU_SCHED_STAT_NOMINAL;
1214
1215 sched_enable:
1216         enable_scheduling(q);
1217 rearm:
1218         /*
1219          * XXX: Ideally want to adjust timeout based on current exection time
1220          * but there is not currently an easy way to do in DRM scheduler. With
1221          * some thought, do this in a follow up.
1222          */
1223         xe_sched_add_pending_job(sched, job);
1224         xe_sched_submission_start(sched);
1225
1226         return DRM_GPU_SCHED_STAT_NOMINAL;
1227 }
1228
1229 static void __guc_exec_queue_fini_async(struct work_struct *w)
1230 {
1231         struct xe_guc_exec_queue *ge =
1232                 container_of(w, struct xe_guc_exec_queue, fini_async);
1233         struct xe_exec_queue *q = ge->q;
1234         struct xe_guc *guc = exec_queue_to_guc(q);
1235
1236         xe_pm_runtime_get(guc_to_xe(guc));
1237         trace_xe_exec_queue_destroy(q);
1238
1239         if (xe_exec_queue_is_lr(q))
1240                 cancel_work_sync(&ge->lr_tdr);
1241         release_guc_id(guc, q);
1242         xe_sched_entity_fini(&ge->entity);
1243         xe_sched_fini(&ge->sched);
1244
1245         kfree(ge);
1246         xe_exec_queue_fini(q);
1247         xe_pm_runtime_put(guc_to_xe(guc));
1248 }
1249
1250 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
1251 {
1252         struct xe_guc *guc = exec_queue_to_guc(q);
1253         struct xe_device *xe = guc_to_xe(guc);
1254
1255         INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
1256
1257         /* We must block on kernel engines so slabs are empty on driver unload */
1258         if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1259                 __guc_exec_queue_fini_async(&q->guc->fini_async);
1260         else
1261                 queue_work(xe->destroy_wq, &q->guc->fini_async);
1262 }
1263
1264 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
1265 {
1266         /*
1267          * Might be done from within the GPU scheduler, need to do async as we
1268          * fini the scheduler when the engine is fini'd, the scheduler can't
1269          * complete fini within itself (circular dependency). Async resolves
1270          * this we and don't really care when everything is fini'd, just that it
1271          * is.
1272          */
1273         guc_exec_queue_fini_async(q);
1274 }
1275
1276 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1277 {
1278         struct xe_exec_queue *q = msg->private_data;
1279         struct xe_guc *guc = exec_queue_to_guc(q);
1280         struct xe_device *xe = guc_to_xe(guc);
1281
1282         xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1283         trace_xe_exec_queue_cleanup_entity(q);
1284
1285         if (exec_queue_registered(q))
1286                 disable_scheduling_deregister(guc, q);
1287         else
1288                 __guc_exec_queue_fini(guc, q);
1289 }
1290
1291 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1292 {
1293         return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1294 }
1295
1296 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1297 {
1298         struct xe_exec_queue *q = msg->private_data;
1299         struct xe_guc *guc = exec_queue_to_guc(q);
1300
1301         if (guc_exec_queue_allowed_to_change_state(q))
1302                 init_policies(guc, q);
1303         kfree(msg);
1304 }
1305
1306 static void __suspend_fence_signal(struct xe_exec_queue *q)
1307 {
1308         if (!q->guc->suspend_pending)
1309                 return;
1310
1311         WRITE_ONCE(q->guc->suspend_pending, false);
1312         wake_up(&q->guc->suspend_wait);
1313 }
1314
1315 static void suspend_fence_signal(struct xe_exec_queue *q)
1316 {
1317         struct xe_guc *guc = exec_queue_to_guc(q);
1318         struct xe_device *xe = guc_to_xe(guc);
1319
1320         xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
1321                   xe_guc_read_stopped(guc));
1322         xe_assert(xe, q->guc->suspend_pending);
1323
1324         __suspend_fence_signal(q);
1325 }
1326
1327 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1328 {
1329         struct xe_exec_queue *q = msg->private_data;
1330         struct xe_guc *guc = exec_queue_to_guc(q);
1331
1332         if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1333             exec_queue_enabled(q)) {
1334                 wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING ||
1335                            xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q));
1336
1337                 if (!xe_guc_read_stopped(guc)) {
1338                         s64 since_resume_ms =
1339                                 ktime_ms_delta(ktime_get(),
1340                                                q->guc->resume_time);
1341                         s64 wait_ms = q->vm->preempt.min_run_period_ms -
1342                                 since_resume_ms;
1343
1344                         if (wait_ms > 0 && q->guc->resume_time)
1345                                 msleep(wait_ms);
1346
1347                         set_exec_queue_suspended(q);
1348                         disable_scheduling(q, false);
1349                 }
1350         } else if (q->guc->suspend_pending) {
1351                 set_exec_queue_suspended(q);
1352                 suspend_fence_signal(q);
1353         }
1354 }
1355
1356 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1357 {
1358         struct xe_exec_queue *q = msg->private_data;
1359
1360         if (guc_exec_queue_allowed_to_change_state(q)) {
1361                 clear_exec_queue_suspended(q);
1362                 if (!exec_queue_enabled(q)) {
1363                         q->guc->resume_time = RESUME_PENDING;
1364                         enable_scheduling(q);
1365                 }
1366         } else {
1367                 clear_exec_queue_suspended(q);
1368         }
1369 }
1370
1371 #define CLEANUP         1       /* Non-zero values to catch uninitialized msg */
1372 #define SET_SCHED_PROPS 2
1373 #define SUSPEND         3
1374 #define RESUME          4
1375 #define OPCODE_MASK     0xf
1376 #define MSG_LOCKED      BIT(8)
1377
1378 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1379 {
1380         struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1381
1382         trace_xe_sched_msg_recv(msg);
1383
1384         switch (msg->opcode) {
1385         case CLEANUP:
1386                 __guc_exec_queue_process_msg_cleanup(msg);
1387                 break;
1388         case SET_SCHED_PROPS:
1389                 __guc_exec_queue_process_msg_set_sched_props(msg);
1390                 break;
1391         case SUSPEND:
1392                 __guc_exec_queue_process_msg_suspend(msg);
1393                 break;
1394         case RESUME:
1395                 __guc_exec_queue_process_msg_resume(msg);
1396                 break;
1397         default:
1398                 XE_WARN_ON("Unknown message type");
1399         }
1400
1401         xe_pm_runtime_put(xe);
1402 }
1403
1404 static const struct drm_sched_backend_ops drm_sched_ops = {
1405         .run_job = guc_exec_queue_run_job,
1406         .free_job = guc_exec_queue_free_job,
1407         .timedout_job = guc_exec_queue_timedout_job,
1408 };
1409
1410 static const struct xe_sched_backend_ops xe_sched_ops = {
1411         .process_msg = guc_exec_queue_process_msg,
1412 };
1413
1414 static int guc_exec_queue_init(struct xe_exec_queue *q)
1415 {
1416         struct xe_gpu_scheduler *sched;
1417         struct xe_guc *guc = exec_queue_to_guc(q);
1418         struct xe_device *xe = guc_to_xe(guc);
1419         struct xe_guc_exec_queue *ge;
1420         long timeout;
1421         int err, i;
1422
1423         xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
1424
1425         ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1426         if (!ge)
1427                 return -ENOMEM;
1428
1429         q->guc = ge;
1430         ge->q = q;
1431         init_waitqueue_head(&ge->suspend_wait);
1432
1433         for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1434                 INIT_LIST_HEAD(&ge->static_msgs[i].link);
1435
1436         timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1437                   msecs_to_jiffies(q->sched_props.job_timeout_ms);
1438         err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1439                             NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1440                             timeout, guc_to_gt(guc)->ordered_wq, NULL,
1441                             q->name, gt_to_xe(q->gt)->drm.dev);
1442         if (err)
1443                 goto err_free;
1444
1445         sched = &ge->sched;
1446         err = xe_sched_entity_init(&ge->entity, sched);
1447         if (err)
1448                 goto err_sched;
1449
1450         if (xe_exec_queue_is_lr(q))
1451                 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1452
1453         mutex_lock(&guc->submission_state.lock);
1454
1455         err = alloc_guc_id(guc, q);
1456         if (err)
1457                 goto err_entity;
1458
1459         q->entity = &ge->entity;
1460
1461         if (xe_guc_read_stopped(guc))
1462                 xe_sched_stop(sched);
1463
1464         mutex_unlock(&guc->submission_state.lock);
1465
1466         xe_exec_queue_assign_name(q, q->guc->id);
1467
1468         trace_xe_exec_queue_create(q);
1469
1470         return 0;
1471
1472 err_entity:
1473         mutex_unlock(&guc->submission_state.lock);
1474         xe_sched_entity_fini(&ge->entity);
1475 err_sched:
1476         xe_sched_fini(&ge->sched);
1477 err_free:
1478         kfree(ge);
1479
1480         return err;
1481 }
1482
1483 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1484 {
1485         trace_xe_exec_queue_kill(q);
1486         set_exec_queue_killed(q);
1487         __suspend_fence_signal(q);
1488         xe_guc_exec_queue_trigger_cleanup(q);
1489 }
1490
1491 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1492                                    u32 opcode)
1493 {
1494         xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
1495
1496         INIT_LIST_HEAD(&msg->link);
1497         msg->opcode = opcode & OPCODE_MASK;
1498         msg->private_data = q;
1499
1500         trace_xe_sched_msg_add(msg);
1501         if (opcode & MSG_LOCKED)
1502                 xe_sched_add_msg_locked(&q->guc->sched, msg);
1503         else
1504                 xe_sched_add_msg(&q->guc->sched, msg);
1505 }
1506
1507 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1508                                        struct xe_sched_msg *msg,
1509                                        u32 opcode)
1510 {
1511         if (!list_empty(&msg->link))
1512                 return false;
1513
1514         guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
1515
1516         return true;
1517 }
1518
1519 #define STATIC_MSG_CLEANUP      0
1520 #define STATIC_MSG_SUSPEND      1
1521 #define STATIC_MSG_RESUME       2
1522 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1523 {
1524         struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1525
1526         if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1527                 guc_exec_queue_add_msg(q, msg, CLEANUP);
1528         else
1529                 __guc_exec_queue_fini(exec_queue_to_guc(q), q);
1530 }
1531
1532 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1533                                        enum xe_exec_queue_priority priority)
1534 {
1535         struct xe_sched_msg *msg;
1536
1537         if (q->sched_props.priority == priority ||
1538             exec_queue_killed_or_banned_or_wedged(q))
1539                 return 0;
1540
1541         msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1542         if (!msg)
1543                 return -ENOMEM;
1544
1545         q->sched_props.priority = priority;
1546         guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1547
1548         return 0;
1549 }
1550
1551 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1552 {
1553         struct xe_sched_msg *msg;
1554
1555         if (q->sched_props.timeslice_us == timeslice_us ||
1556             exec_queue_killed_or_banned_or_wedged(q))
1557                 return 0;
1558
1559         msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1560         if (!msg)
1561                 return -ENOMEM;
1562
1563         q->sched_props.timeslice_us = timeslice_us;
1564         guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1565
1566         return 0;
1567 }
1568
1569 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1570                                               u32 preempt_timeout_us)
1571 {
1572         struct xe_sched_msg *msg;
1573
1574         if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1575             exec_queue_killed_or_banned_or_wedged(q))
1576                 return 0;
1577
1578         msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1579         if (!msg)
1580                 return -ENOMEM;
1581
1582         q->sched_props.preempt_timeout_us = preempt_timeout_us;
1583         guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1584
1585         return 0;
1586 }
1587
1588 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1589 {
1590         struct xe_gpu_scheduler *sched = &q->guc->sched;
1591         struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1592
1593         if (exec_queue_killed_or_banned_or_wedged(q))
1594                 return -EINVAL;
1595
1596         xe_sched_msg_lock(sched);
1597         if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1598                 q->guc->suspend_pending = true;
1599         xe_sched_msg_unlock(sched);
1600
1601         return 0;
1602 }
1603
1604 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1605 {
1606         struct xe_guc *guc = exec_queue_to_guc(q);
1607         int ret;
1608
1609         /*
1610          * Likely don't need to check exec_queue_killed() as we clear
1611          * suspend_pending upon kill but to be paranoid but races in which
1612          * suspend_pending is set after kill also check kill here.
1613          */
1614         ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1615                                                !READ_ONCE(q->guc->suspend_pending) ||
1616                                                exec_queue_killed(q) ||
1617                                                xe_guc_read_stopped(guc),
1618                                                HZ * 5);
1619
1620         if (!ret) {
1621                 xe_gt_warn(guc_to_gt(guc),
1622                            "Suspend fence, guc_id=%d, failed to respond",
1623                            q->guc->id);
1624                 /* XXX: Trigger GT reset? */
1625                 return -ETIME;
1626         }
1627
1628         return ret < 0 ? ret : 0;
1629 }
1630
1631 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1632 {
1633         struct xe_gpu_scheduler *sched = &q->guc->sched;
1634         struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1635         struct xe_guc *guc = exec_queue_to_guc(q);
1636         struct xe_device *xe = guc_to_xe(guc);
1637
1638         xe_assert(xe, !q->guc->suspend_pending);
1639
1640         xe_sched_msg_lock(sched);
1641         guc_exec_queue_try_add_msg(q, msg, RESUME);
1642         xe_sched_msg_unlock(sched);
1643 }
1644
1645 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1646 {
1647         return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1648 }
1649
1650 /*
1651  * All of these functions are an abstraction layer which other parts of XE can
1652  * use to trap into the GuC backend. All of these functions, aside from init,
1653  * really shouldn't do much other than trap into the DRM scheduler which
1654  * synchronizes these operations.
1655  */
1656 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1657         .init = guc_exec_queue_init,
1658         .kill = guc_exec_queue_kill,
1659         .fini = guc_exec_queue_fini,
1660         .set_priority = guc_exec_queue_set_priority,
1661         .set_timeslice = guc_exec_queue_set_timeslice,
1662         .set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1663         .suspend = guc_exec_queue_suspend,
1664         .suspend_wait = guc_exec_queue_suspend_wait,
1665         .resume = guc_exec_queue_resume,
1666         .reset_status = guc_exec_queue_reset_status,
1667 };
1668
1669 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1670 {
1671         struct xe_gpu_scheduler *sched = &q->guc->sched;
1672
1673         /* Stop scheduling + flush any DRM scheduler operations */
1674         xe_sched_submission_stop(sched);
1675
1676         /* Clean up lost G2H + reset engine state */
1677         if (exec_queue_registered(q)) {
1678                 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1679                         xe_exec_queue_put(q);
1680                 else if (exec_queue_destroyed(q))
1681                         __guc_exec_queue_fini(guc, q);
1682         }
1683         if (q->guc->suspend_pending) {
1684                 set_exec_queue_suspended(q);
1685                 suspend_fence_signal(q);
1686         }
1687         atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
1688                    EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
1689                    EXEC_QUEUE_STATE_SUSPENDED,
1690                    &q->guc->state);
1691         q->guc->resume_time = 0;
1692         trace_xe_exec_queue_stop(q);
1693
1694         /*
1695          * Ban any engine (aside from kernel and engines used for VM ops) with a
1696          * started but not complete job or if a job has gone through a GT reset
1697          * more than twice.
1698          */
1699         if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1700                 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1701                 bool ban = false;
1702
1703                 if (job) {
1704                         if ((xe_sched_job_started(job) &&
1705                             !xe_sched_job_completed(job)) ||
1706                             xe_sched_invalidate_job(job, 2)) {
1707                                 trace_xe_sched_job_ban(job);
1708                                 ban = true;
1709                         }
1710                 } else if (xe_exec_queue_is_lr(q) &&
1711                            (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
1712                         ban = true;
1713                 }
1714
1715                 if (ban) {
1716                         set_exec_queue_banned(q);
1717                         xe_guc_exec_queue_trigger_cleanup(q);
1718                 }
1719         }
1720 }
1721
1722 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1723 {
1724         int ret;
1725
1726         /*
1727          * Using an atomic here rather than submission_state.lock as this
1728          * function can be called while holding the CT lock (engine reset
1729          * failure). submission_state.lock needs the CT lock to resubmit jobs.
1730          * Atomic is not ideal, but it works to prevent against concurrent reset
1731          * and releasing any TDRs waiting on guc->submission_state.stopped.
1732          */
1733         ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1734         smp_wmb();
1735         wake_up_all(&guc->ct.wq);
1736
1737         return ret;
1738 }
1739
1740 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1741 {
1742         wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
1743                    !xe_guc_read_stopped(guc));
1744 }
1745
1746 void xe_guc_submit_stop(struct xe_guc *guc)
1747 {
1748         struct xe_exec_queue *q;
1749         unsigned long index;
1750         struct xe_device *xe = guc_to_xe(guc);
1751
1752         xe_assert(xe, xe_guc_read_stopped(guc) == 1);
1753
1754         mutex_lock(&guc->submission_state.lock);
1755
1756         xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1757                 /* Prevent redundant attempts to stop parallel queues */
1758                 if (q->guc->id != index)
1759                         continue;
1760
1761                 guc_exec_queue_stop(guc, q);
1762         }
1763
1764         mutex_unlock(&guc->submission_state.lock);
1765
1766         /*
1767          * No one can enter the backend at this point, aside from new engine
1768          * creation which is protected by guc->submission_state.lock.
1769          */
1770
1771 }
1772
1773 static void guc_exec_queue_start(struct xe_exec_queue *q)
1774 {
1775         struct xe_gpu_scheduler *sched = &q->guc->sched;
1776
1777         if (!exec_queue_killed_or_banned_or_wedged(q)) {
1778                 int i;
1779
1780                 trace_xe_exec_queue_resubmit(q);
1781                 for (i = 0; i < q->width; ++i)
1782                         xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
1783                 xe_sched_resubmit_jobs(sched);
1784         }
1785
1786         xe_sched_submission_start(sched);
1787         xe_sched_submission_resume_tdr(sched);
1788 }
1789
1790 int xe_guc_submit_start(struct xe_guc *guc)
1791 {
1792         struct xe_exec_queue *q;
1793         unsigned long index;
1794         struct xe_device *xe = guc_to_xe(guc);
1795
1796         xe_assert(xe, xe_guc_read_stopped(guc) == 1);
1797
1798         mutex_lock(&guc->submission_state.lock);
1799         atomic_dec(&guc->submission_state.stopped);
1800         xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
1801                 /* Prevent redundant attempts to start parallel queues */
1802                 if (q->guc->id != index)
1803                         continue;
1804
1805                 guc_exec_queue_start(q);
1806         }
1807         mutex_unlock(&guc->submission_state.lock);
1808
1809         wake_up_all(&guc->ct.wq);
1810
1811         return 0;
1812 }
1813
1814 static struct xe_exec_queue *
1815 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
1816 {
1817         struct xe_device *xe = guc_to_xe(guc);
1818         struct xe_exec_queue *q;
1819
1820         if (unlikely(guc_id >= GUC_ID_MAX)) {
1821                 drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1822                 return NULL;
1823         }
1824
1825         q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
1826         if (unlikely(!q)) {
1827                 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1828                 return NULL;
1829         }
1830
1831         xe_assert(xe, guc_id >= q->guc->id);
1832         xe_assert(xe, guc_id < (q->guc->id + q->width));
1833
1834         return q;
1835 }
1836
1837 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1838 {
1839         u32 action[] = {
1840                 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1841                 q->guc->id,
1842         };
1843
1844         xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
1845         xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1846         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1847         xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1848
1849         trace_xe_exec_queue_deregister(q);
1850
1851         xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1852 }
1853
1854 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
1855                               u32 runnable_state)
1856 {
1857         trace_xe_exec_queue_scheduling_done(q);
1858
1859         if (runnable_state == 1) {
1860                 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
1861
1862                 q->guc->resume_time = ktime_get();
1863                 clear_exec_queue_pending_enable(q);
1864                 smp_wmb();
1865                 wake_up_all(&guc->ct.wq);
1866         } else {
1867                 bool check_timeout = exec_queue_check_timeout(q);
1868
1869                 xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
1870                 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
1871
1872                 if (q->guc->suspend_pending) {
1873                         suspend_fence_signal(q);
1874                         clear_exec_queue_pending_disable(q);
1875                 } else {
1876                         if (exec_queue_banned(q) || check_timeout) {
1877                                 smp_wmb();
1878                                 wake_up_all(&guc->ct.wq);
1879                         }
1880                         if (!check_timeout && exec_queue_destroyed(q)) {
1881                                 /*
1882                                  * Make sure to clear the pending_disable only
1883                                  * after sampling the destroyed state. We want
1884                                  * to ensure we don't trigger the unregister too
1885                                  * early with something intending to only
1886                                  * disable scheduling. The caller doing the
1887                                  * destroy must wait for an ongoing
1888                                  * pending_disable before marking as destroyed.
1889                                  */
1890                                 clear_exec_queue_pending_disable(q);
1891                                 deregister_exec_queue(guc, q);
1892                         } else {
1893                                 clear_exec_queue_pending_disable(q);
1894                         }
1895                 }
1896         }
1897 }
1898
1899 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1900 {
1901         struct xe_device *xe = guc_to_xe(guc);
1902         struct xe_exec_queue *q;
1903         u32 guc_id = msg[0];
1904         u32 runnable_state = msg[1];
1905
1906         if (unlikely(len < 2)) {
1907                 drm_err(&xe->drm, "Invalid length %u", len);
1908                 return -EPROTO;
1909         }
1910
1911         q = g2h_exec_queue_lookup(guc, guc_id);
1912         if (unlikely(!q))
1913                 return -EPROTO;
1914
1915         if (unlikely(!exec_queue_pending_enable(q) &&
1916                      !exec_queue_pending_disable(q))) {
1917                 xe_gt_err(guc_to_gt(guc),
1918                           "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
1919                           atomic_read(&q->guc->state), q->guc->id,
1920                           runnable_state);
1921                 return -EPROTO;
1922         }
1923
1924         handle_sched_done(guc, q, runnable_state);
1925
1926         return 0;
1927 }
1928
1929 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
1930 {
1931         trace_xe_exec_queue_deregister_done(q);
1932
1933         clear_exec_queue_registered(q);
1934
1935         if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1936                 xe_exec_queue_put(q);
1937         else
1938                 __guc_exec_queue_fini(guc, q);
1939 }
1940
1941 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1942 {
1943         struct xe_device *xe = guc_to_xe(guc);
1944         struct xe_exec_queue *q;
1945         u32 guc_id = msg[0];
1946
1947         if (unlikely(len < 1)) {
1948                 drm_err(&xe->drm, "Invalid length %u", len);
1949                 return -EPROTO;
1950         }
1951
1952         q = g2h_exec_queue_lookup(guc, guc_id);
1953         if (unlikely(!q))
1954                 return -EPROTO;
1955
1956         if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
1957             exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
1958                 xe_gt_err(guc_to_gt(guc),
1959                           "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
1960                           atomic_read(&q->guc->state), q->guc->id);
1961                 return -EPROTO;
1962         }
1963
1964         handle_deregister_done(guc, q);
1965
1966         return 0;
1967 }
1968
1969 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1970 {
1971         struct xe_gt *gt = guc_to_gt(guc);
1972         struct xe_device *xe = guc_to_xe(guc);
1973         struct xe_exec_queue *q;
1974         u32 guc_id = msg[0];
1975
1976         if (unlikely(len < 1)) {
1977                 drm_err(&xe->drm, "Invalid length %u", len);
1978                 return -EPROTO;
1979         }
1980
1981         q = g2h_exec_queue_lookup(guc, guc_id);
1982         if (unlikely(!q))
1983                 return -EPROTO;
1984
1985         xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
1986                    xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
1987
1988         trace_xe_exec_queue_reset(q);
1989
1990         /*
1991          * A banned engine is a NOP at this point (came from
1992          * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
1993          * jobs by setting timeout of the job to the minimum value kicking
1994          * guc_exec_queue_timedout_job.
1995          */
1996         set_exec_queue_reset(q);
1997         if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
1998                 xe_guc_exec_queue_trigger_cleanup(q);
1999
2000         return 0;
2001 }
2002
2003 /*
2004  * xe_guc_error_capture_handler - Handler of GuC captured message
2005  * @guc: The GuC object
2006  * @msg: Point to the message
2007  * @len: The message length
2008  *
2009  * When GuC captured data is ready, GuC will send message
2010  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2011  * called 1st to check status before process the data comes with the message.
2012  *
2013  * Returns: error code. 0 if success
2014  */
2015 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
2016 {
2017         u32 status;
2018
2019         if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) {
2020                 xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len);
2021                 return -EPROTO;
2022         }
2023
2024         status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2025         if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2026                 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2027
2028         xe_guc_capture_process(guc);
2029
2030         return 0;
2031 }
2032
2033 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2034                                                u32 len)
2035 {
2036         struct xe_gt *gt = guc_to_gt(guc);
2037         struct xe_device *xe = guc_to_xe(guc);
2038         struct xe_exec_queue *q;
2039         u32 guc_id = msg[0];
2040
2041         if (unlikely(len < 1)) {
2042                 drm_err(&xe->drm, "Invalid length %u", len);
2043                 return -EPROTO;
2044         }
2045
2046         q = g2h_exec_queue_lookup(guc, guc_id);
2047         if (unlikely(!q))
2048                 return -EPROTO;
2049
2050         xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2051                   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2052
2053         trace_xe_exec_queue_memory_cat_error(q);
2054
2055         /* Treat the same as engine reset */
2056         set_exec_queue_reset(q);
2057         if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2058                 xe_guc_exec_queue_trigger_cleanup(q);
2059
2060         return 0;
2061 }
2062
2063 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2064 {
2065         struct xe_device *xe = guc_to_xe(guc);
2066         u8 guc_class, instance;
2067         u32 reason;
2068
2069         if (unlikely(len != 3)) {
2070                 drm_err(&xe->drm, "Invalid length %u", len);
2071                 return -EPROTO;
2072         }
2073
2074         guc_class = msg[0];
2075         instance = msg[1];
2076         reason = msg[2];
2077
2078         /* Unexpected failure of a hardware feature, log an actual error */
2079         drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
2080                 guc_class, instance, reason);
2081
2082         xe_gt_reset_async(guc_to_gt(guc));
2083
2084         return 0;
2085 }
2086
2087 static void
2088 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2089                                    struct xe_guc_submit_exec_queue_snapshot *snapshot)
2090 {
2091         struct xe_guc *guc = exec_queue_to_guc(q);
2092         struct xe_device *xe = guc_to_xe(guc);
2093         struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
2094         int i;
2095
2096         snapshot->guc.wqi_head = q->guc->wqi_head;
2097         snapshot->guc.wqi_tail = q->guc->wqi_tail;
2098         snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2099         snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2100         snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2101                                                           wq_desc.wq_status);
2102
2103         if (snapshot->parallel.wq_desc.head !=
2104             snapshot->parallel.wq_desc.tail) {
2105                 for (i = snapshot->parallel.wq_desc.head;
2106                      i != snapshot->parallel.wq_desc.tail;
2107                      i = (i + sizeof(u32)) % WQ_SIZE)
2108                         snapshot->parallel.wq[i / sizeof(u32)] =
2109                                 parallel_read(xe, map, wq[i / sizeof(u32)]);
2110         }
2111 }
2112
2113 static void
2114 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2115                                  struct drm_printer *p)
2116 {
2117         int i;
2118
2119         drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
2120                    snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2121         drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
2122                    snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2123         drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2124
2125         if (snapshot->parallel.wq_desc.head !=
2126             snapshot->parallel.wq_desc.tail) {
2127                 for (i = snapshot->parallel.wq_desc.head;
2128                      i != snapshot->parallel.wq_desc.tail;
2129                      i = (i + sizeof(u32)) % WQ_SIZE)
2130                         drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2131                                    snapshot->parallel.wq[i / sizeof(u32)]);
2132         }
2133 }
2134
2135 /**
2136  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2137  * @q: faulty exec queue
2138  *
2139  * This can be printed out in a later stage like during dev_coredump
2140  * analysis.
2141  *
2142  * Returns: a GuC Submit Engine snapshot object that must be freed by the
2143  * caller, using `xe_guc_exec_queue_snapshot_free`.
2144  */
2145 struct xe_guc_submit_exec_queue_snapshot *
2146 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2147 {
2148         struct xe_gpu_scheduler *sched = &q->guc->sched;
2149         struct xe_guc_submit_exec_queue_snapshot *snapshot;
2150         int i;
2151
2152         snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2153
2154         if (!snapshot)
2155                 return NULL;
2156
2157         snapshot->guc.id = q->guc->id;
2158         memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2159         snapshot->class = q->class;
2160         snapshot->logical_mask = q->logical_mask;
2161         snapshot->width = q->width;
2162         snapshot->refcount = kref_read(&q->refcount);
2163         snapshot->sched_timeout = sched->base.timeout;
2164         snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2165         snapshot->sched_props.preempt_timeout_us =
2166                 q->sched_props.preempt_timeout_us;
2167
2168         snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2169                                       GFP_ATOMIC);
2170
2171         if (snapshot->lrc) {
2172                 for (i = 0; i < q->width; ++i) {
2173                         struct xe_lrc *lrc = q->lrc[i];
2174
2175                         snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2176                 }
2177         }
2178
2179         snapshot->schedule_state = atomic_read(&q->guc->state);
2180         snapshot->exec_queue_flags = q->flags;
2181
2182         snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2183         if (snapshot->parallel_execution)
2184                 guc_exec_queue_wq_snapshot_capture(q, snapshot);
2185
2186         spin_lock(&sched->base.job_list_lock);
2187         snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
2188         snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2189                                                sizeof(struct pending_list_snapshot),
2190                                                GFP_ATOMIC);
2191
2192         if (snapshot->pending_list) {
2193                 struct xe_sched_job *job_iter;
2194
2195                 i = 0;
2196                 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2197                         snapshot->pending_list[i].seqno =
2198                                 xe_sched_job_seqno(job_iter);
2199                         snapshot->pending_list[i].fence =
2200                                 dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
2201                         snapshot->pending_list[i].finished =
2202                                 dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
2203                                 ? 1 : 0;
2204                         i++;
2205                 }
2206         }
2207
2208         spin_unlock(&sched->base.job_list_lock);
2209
2210         return snapshot;
2211 }
2212
2213 /**
2214  * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2215  * @snapshot: Previously captured snapshot of job.
2216  *
2217  * This captures some data that requires taking some locks, so it cannot be done in signaling path.
2218  */
2219 void
2220 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2221 {
2222         int i;
2223
2224         if (!snapshot || !snapshot->lrc)
2225                 return;
2226
2227         for (i = 0; i < snapshot->width; ++i)
2228                 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
2229 }
2230
2231 /**
2232  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2233  * @snapshot: GuC Submit Engine snapshot object.
2234  * @p: drm_printer where it will be printed out.
2235  *
2236  * This function prints out a given GuC Submit Engine snapshot object.
2237  */
2238 void
2239 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2240                                  struct drm_printer *p)
2241 {
2242         int i;
2243
2244         if (!snapshot)
2245                 return;
2246
2247         drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
2248         drm_printf(p, "\tName: %s\n", snapshot->name);
2249         drm_printf(p, "\tClass: %d\n", snapshot->class);
2250         drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2251         drm_printf(p, "\tWidth: %d\n", snapshot->width);
2252         drm_printf(p, "\tRef: %d\n", snapshot->refcount);
2253         drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2254         drm_printf(p, "\tTimeslice: %u (us)\n",
2255                    snapshot->sched_props.timeslice_us);
2256         drm_printf(p, "\tPreempt timeout: %u (us)\n",
2257                    snapshot->sched_props.preempt_timeout_us);
2258
2259         for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
2260                 xe_lrc_snapshot_print(snapshot->lrc[i], p);
2261
2262         drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2263         drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2264
2265         if (snapshot->parallel_execution)
2266                 guc_exec_queue_wq_snapshot_print(snapshot, p);
2267
2268         for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
2269              i++)
2270                 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
2271                            snapshot->pending_list[i].seqno,
2272                            snapshot->pending_list[i].fence,
2273                            snapshot->pending_list[i].finished);
2274 }
2275
2276 /**
2277  * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2278  * snapshot.
2279  * @snapshot: GuC Submit Engine snapshot object.
2280  *
2281  * This function free all the memory that needed to be allocated at capture
2282  * time.
2283  */
2284 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2285 {
2286         int i;
2287
2288         if (!snapshot)
2289                 return;
2290
2291         if (snapshot->lrc) {
2292                 for (i = 0; i < snapshot->width; i++)
2293                         xe_lrc_snapshot_free(snapshot->lrc[i]);
2294                 kfree(snapshot->lrc);
2295         }
2296         kfree(snapshot->pending_list);
2297         kfree(snapshot);
2298 }
2299
2300 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
2301 {
2302         struct xe_guc_submit_exec_queue_snapshot *snapshot;
2303
2304         snapshot = xe_guc_exec_queue_snapshot_capture(q);
2305         xe_guc_exec_queue_snapshot_print(snapshot, p);
2306         xe_guc_exec_queue_snapshot_free(snapshot);
2307 }
2308
2309 /**
2310  * xe_guc_submit_print - GuC Submit Print.
2311  * @guc: GuC.
2312  * @p: drm_printer where it will be printed out.
2313  *
2314  * This function capture and prints snapshots of **all** GuC Engines.
2315  */
2316 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2317 {
2318         struct xe_exec_queue *q;
2319         unsigned long index;
2320
2321         if (!xe_device_uc_enabled(guc_to_xe(guc)))
2322                 return;
2323
2324         mutex_lock(&guc->submission_state.lock);
2325         xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2326                 guc_exec_queue_print(q, p);
2327         mutex_unlock(&guc->submission_state.lock);
2328 }