1 // SPDX-License-Identifier: MIT
3 * Copyright © 2014 Intel Corporation
6 #include "gen8_engine_cs.h"
8 #include "intel_engine_regs.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_lrc.h"
11 #include "intel_ring.h"
13 int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
15 bool vf_flush_wa = false, dc_flush_wa = false;
19 flags |= PIPE_CONTROL_CS_STALL;
21 if (mode & EMIT_FLUSH) {
22 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
23 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
24 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
25 flags |= PIPE_CONTROL_FLUSH_ENABLE;
28 if (mode & EMIT_INVALIDATE) {
29 flags |= PIPE_CONTROL_TLB_INVALIDATE;
30 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
31 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
32 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
33 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
34 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
35 flags |= PIPE_CONTROL_QW_WRITE;
36 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
39 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
42 if (GRAPHICS_VER(rq->engine->i915) == 9)
45 /* WaForGAMHang:kbl */
46 if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
58 cs = intel_ring_begin(rq, len);
63 cs = gen8_emit_pipe_control(cs, 0, 0);
66 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
69 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
72 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
74 intel_ring_advance(rq, cs);
79 int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
83 cs = intel_ring_begin(rq, 4);
87 cmd = MI_FLUSH_DW + 1;
90 * We always require a command barrier so that subsequent
91 * commands, such as breadcrumb interrupts, are strictly ordered
92 * wrt the contents of the write cache being flushed to memory
93 * (and thus being coherent from the CPU).
95 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
97 if (mode & EMIT_INVALIDATE) {
98 cmd |= MI_INVALIDATE_TLB;
99 if (rq->engine->class == VIDEO_DECODE_CLASS)
100 cmd |= MI_INVALIDATE_BSD;
104 *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
105 *cs++ = 0; /* upper addr */
106 *cs++ = 0; /* value */
107 intel_ring_advance(rq, cs);
112 int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
114 if (mode & EMIT_FLUSH) {
118 flags |= PIPE_CONTROL_CS_STALL;
120 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
121 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
122 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
123 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
124 flags |= PIPE_CONTROL_FLUSH_ENABLE;
125 flags |= PIPE_CONTROL_QW_WRITE;
126 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
128 cs = intel_ring_begin(rq, 6);
132 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
133 intel_ring_advance(rq, cs);
136 if (mode & EMIT_INVALIDATE) {
140 flags |= PIPE_CONTROL_CS_STALL;
142 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
143 flags |= PIPE_CONTROL_TLB_INVALIDATE;
144 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
145 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
146 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
147 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
148 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
149 flags |= PIPE_CONTROL_QW_WRITE;
150 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
152 cs = intel_ring_begin(rq, 6);
156 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
157 intel_ring_advance(rq, cs);
163 static u32 preparser_disable(bool state)
165 return MI_ARB_CHECK | 1 << 8 | state;
168 u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
170 u32 gsi_offset = gt->uncore->gsi_offset;
172 *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
173 *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
180 static int mtl_dummy_pipe_control(struct i915_request *rq)
183 if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
184 IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
187 /* dummy PIPE_CONTROL + depth flush */
188 cs = intel_ring_begin(rq, 6);
191 cs = gen12_emit_pipe_control(cs,
193 PIPE_CONTROL_DEPTH_CACHE_FLUSH,
194 LRC_PPHWSP_SCRATCH_ADDR);
195 intel_ring_advance(rq, cs);
201 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
203 struct intel_engine_cs *engine = rq->engine;
205 if (mode & EMIT_FLUSH) {
210 err = mtl_dummy_pipe_control(rq);
214 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
215 flags |= PIPE_CONTROL_FLUSH_L3;
216 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
217 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
218 /* Wa_1409600907:tgl,adl-p */
219 flags |= PIPE_CONTROL_DEPTH_STALL;
220 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
221 flags |= PIPE_CONTROL_FLUSH_ENABLE;
223 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
224 flags |= PIPE_CONTROL_QW_WRITE;
226 flags |= PIPE_CONTROL_CS_STALL;
228 if (!HAS_3D_PIPELINE(engine->i915))
229 flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
230 else if (engine->class == COMPUTE_CLASS)
231 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
233 cs = intel_ring_begin(rq, 6);
237 cs = gen12_emit_pipe_control(cs,
238 PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
239 flags, LRC_PPHWSP_SCRATCH_ADDR);
240 intel_ring_advance(rq, cs);
243 if (mode & EMIT_INVALIDATE) {
248 err = mtl_dummy_pipe_control(rq);
252 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
253 flags |= PIPE_CONTROL_TLB_INVALIDATE;
254 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
255 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
256 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
257 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
258 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
260 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
261 flags |= PIPE_CONTROL_QW_WRITE;
263 flags |= PIPE_CONTROL_CS_STALL;
265 if (!HAS_3D_PIPELINE(engine->i915))
266 flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
267 else if (engine->class == COMPUTE_CLASS)
268 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
270 if (!HAS_FLAT_CCS(rq->engine->i915))
275 cs = intel_ring_begin(rq, count);
280 * Prevent the pre-parser from skipping past the TLB
281 * invalidate and loading a stale page for the batch
282 * buffer / request payload.
284 *cs++ = preparser_disable(true);
286 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
288 if (!HAS_FLAT_CCS(rq->engine->i915)) {
289 /* hsdes: 1809175790 */
290 cs = gen12_emit_aux_table_inv(rq->engine->gt,
291 cs, GEN12_GFX_CCS_AUX_NV);
294 *cs++ = preparser_disable(false);
295 intel_ring_advance(rq, cs);
301 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
303 intel_engine_mask_t aux_inv = 0;
307 if (mode & EMIT_INVALIDATE) {
310 if (!HAS_FLAT_CCS(rq->engine->i915) &&
311 (rq->engine->class == VIDEO_DECODE_CLASS ||
312 rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
313 aux_inv = rq->engine->mask &
314 ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
320 cs = intel_ring_begin(rq, cmd);
324 if (mode & EMIT_INVALIDATE)
325 *cs++ = preparser_disable(true);
327 cmd = MI_FLUSH_DW + 1;
330 * We always require a command barrier so that subsequent
331 * commands, such as breadcrumb interrupts, are strictly ordered
332 * wrt the contents of the write cache being flushed to memory
333 * (and thus being coherent from the CPU).
335 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
337 if (mode & EMIT_INVALIDATE) {
338 cmd |= MI_INVALIDATE_TLB;
339 if (rq->engine->class == VIDEO_DECODE_CLASS)
340 cmd |= MI_INVALIDATE_BSD;
344 *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
345 *cs++ = 0; /* upper addr */
346 *cs++ = 0; /* value */
348 if (aux_inv) { /* hsdes: 1809175790 */
349 if (rq->engine->class == VIDEO_DECODE_CLASS)
350 cs = gen12_emit_aux_table_inv(rq->engine->gt,
351 cs, GEN12_VD0_AUX_NV);
353 cs = gen12_emit_aux_table_inv(rq->engine->gt,
354 cs, GEN12_VE0_AUX_NV);
357 if (mode & EMIT_INVALIDATE)
358 *cs++ = preparser_disable(false);
360 intel_ring_advance(rq, cs);
365 static u32 preempt_address(struct intel_engine_cs *engine)
367 return (i915_ggtt_offset(engine->status_page.vma) +
368 I915_GEM_HWS_PREEMPT_ADDR);
371 static u32 hwsp_offset(const struct i915_request *rq)
373 const struct intel_timeline *tl;
375 /* Before the request is executed, the timeline is fixed */
376 tl = rcu_dereference_protected(rq->timeline,
377 !i915_request_signaled(rq));
379 /* See the comment in i915_request_active_seqno(). */
380 return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
383 int gen8_emit_init_breadcrumb(struct i915_request *rq)
387 GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
388 if (!i915_request_timeline(rq)->has_initial_breadcrumb)
391 cs = intel_ring_begin(rq, 6);
395 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
396 *cs++ = hwsp_offset(rq);
398 *cs++ = rq->fence.seqno - 1;
401 * Check if we have been preempted before we even get started.
403 * After this point i915_request_started() reports true, even if
404 * we get preempted and so are no longer running.
406 * i915_request_started() is used during preemption processing
407 * to decide if the request is currently inside the user payload
408 * or spinning on a kernel semaphore (or earlier). For no-preemption
409 * requests, we do allow preemption on the semaphore before the user
410 * payload, but do not allow preemption once the request is started.
412 * i915_request_started() is similarly used during GPU hangs to
413 * determine if the user's payload was guilty, and if so, the
414 * request is banned. Before the request is started, it is assumed
415 * to be unharmed and an innocent victim of another's hang.
418 *cs++ = MI_ARB_CHECK;
420 intel_ring_advance(rq, cs);
422 /* Record the updated position of the request's payload */
423 rq->infix = intel_ring_offset(rq, cs);
425 __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
430 static int __xehp_emit_bb_start(struct i915_request *rq,
432 const unsigned int flags,
435 struct intel_context *ce = rq->context;
436 u32 wa_offset = lrc_indirect_bb(ce);
439 GEM_BUG_ON(!ce->wa_bb_page);
441 cs = intel_ring_begin(rq, 12);
445 *cs++ = MI_ARB_ON_OFF | arb;
447 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
448 MI_SRM_LRM_GLOBAL_GTT |
450 *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0));
451 *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA;
454 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
455 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
456 *cs++ = lower_32_bits(offset);
457 *cs++ = upper_32_bits(offset);
459 /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */
460 *cs++ = MI_BATCH_BUFFER_START_GEN8;
461 *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB;
464 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
466 intel_ring_advance(rq, cs);
471 int xehp_emit_bb_start_noarb(struct i915_request *rq,
473 const unsigned int flags)
475 return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
478 int xehp_emit_bb_start(struct i915_request *rq,
480 const unsigned int flags)
482 return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
485 int gen8_emit_bb_start_noarb(struct i915_request *rq,
487 const unsigned int flags)
491 cs = intel_ring_begin(rq, 4);
496 * WaDisableCtxRestoreArbitration:bdw,chv
498 * We don't need to perform MI_ARB_ENABLE as often as we do (in
499 * particular all the gen that do not need the w/a at all!), if we
500 * took care to make sure that on every switch into this context
501 * (both ordinary and for preemption) that arbitrartion was enabled
502 * we would be fine. However, for gen8 there is another w/a that
503 * requires us to not preempt inside GPGPU execution, so we keep
504 * arbitration disabled for gen8 batches. Arbitration will be
505 * re-enabled before we close the request
506 * (engine->emit_fini_breadcrumb).
508 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
510 /* FIXME(BDW+): Address space and security selectors. */
511 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
512 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
513 *cs++ = lower_32_bits(offset);
514 *cs++ = upper_32_bits(offset);
516 intel_ring_advance(rq, cs);
521 int gen8_emit_bb_start(struct i915_request *rq,
523 const unsigned int flags)
527 if (unlikely(i915_request_has_nopreempt(rq)))
528 return gen8_emit_bb_start_noarb(rq, offset, len, flags);
530 cs = intel_ring_begin(rq, 6);
534 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
536 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
537 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
538 *cs++ = lower_32_bits(offset);
539 *cs++ = upper_32_bits(offset);
541 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
544 intel_ring_advance(rq, cs);
549 static void assert_request_valid(struct i915_request *rq)
551 struct intel_ring *ring __maybe_unused = rq->ring;
553 /* Can we unwind this request without appearing to go forwards? */
554 GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
558 * Reserve space for 2 NOOPs at the end of each request to be
559 * used as a workaround for not being allowed to do lite
560 * restore with HEAD==TAIL (WaIdleLiteRestore).
562 static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
564 /* Ensure there's always at least one preemption point per-request. */
565 *cs++ = MI_ARB_CHECK;
567 rq->wa_tail = intel_ring_offset(rq, cs);
569 /* Check that entire request is less than half the ring */
570 assert_request_valid(rq);
575 static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
577 *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
578 *cs++ = MI_SEMAPHORE_WAIT |
579 MI_SEMAPHORE_GLOBAL_GTT |
581 MI_SEMAPHORE_SAD_EQ_SDD;
583 *cs++ = preempt_address(rq->engine);
590 static __always_inline u32*
591 gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
593 *cs++ = MI_USER_INTERRUPT;
595 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
596 if (intel_engine_has_semaphores(rq->engine) &&
597 !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
598 cs = emit_preempt_busywait(rq, cs);
600 rq->tail = intel_ring_offset(rq, cs);
601 assert_ring_tail_valid(rq->ring, rq->tail);
603 return gen8_emit_wa_tail(rq, cs);
606 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
608 return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
611 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
613 return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
616 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
618 cs = gen8_emit_pipe_control(cs,
619 PIPE_CONTROL_CS_STALL |
620 PIPE_CONTROL_TLB_INVALIDATE |
621 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
622 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
623 PIPE_CONTROL_DC_FLUSH_ENABLE,
626 /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
627 cs = gen8_emit_ggtt_write_rcs(cs,
630 PIPE_CONTROL_FLUSH_ENABLE |
631 PIPE_CONTROL_CS_STALL);
633 return gen8_emit_fini_breadcrumb_tail(rq, cs);
636 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
638 cs = gen8_emit_pipe_control(cs,
639 PIPE_CONTROL_CS_STALL |
640 PIPE_CONTROL_TLB_INVALIDATE |
641 PIPE_CONTROL_TILE_CACHE_FLUSH |
642 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
643 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
644 PIPE_CONTROL_DC_FLUSH_ENABLE,
647 /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
648 cs = gen8_emit_ggtt_write_rcs(cs,
651 PIPE_CONTROL_FLUSH_ENABLE |
652 PIPE_CONTROL_CS_STALL);
654 return gen8_emit_fini_breadcrumb_tail(rq, cs);
658 * Note that the CS instruction pre-parser will not stall on the breadcrumb
659 * flush and will continue pre-fetching the instructions after it before the
660 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
661 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
662 * of the next request before the memory has been flushed, we're guaranteed that
663 * we won't access the batch itself too early.
664 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
665 * so, if the current request is modifying an instruction in the next request on
666 * the same intel_context, we might pre-fetch and then execute the pre-update
667 * instruction. To avoid this, the users of self-modifying code should either
668 * disable the parser around the code emitting the memory writes, via a new flag
669 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
670 * the in-kernel use-cases we've opted to use a separate context, see
671 * reloc_gpu() as an example.
672 * All the above applies only to the instructions themselves. Non-inline data
673 * used by the instructions is not pre-fetched.
676 static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
678 *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
679 *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
680 MI_SEMAPHORE_GLOBAL_GTT |
682 MI_SEMAPHORE_SAD_EQ_SDD;
684 *cs++ = preempt_address(rq->engine);
691 /* Wa_14014475959:dg2 */
692 #define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
693 static u32 ccs_semaphore_offset(struct i915_request *rq)
695 return i915_ggtt_offset(rq->context->state) +
696 (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
699 /* Wa_14014475959:dg2 */
700 static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
704 *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
706 *cs++ = ccs_semaphore_offset(rq);
711 * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
712 * to align. 4 DWs above + 8 filler DWs here.
714 for (i = 0; i < 8; ++i)
717 *cs++ = MI_SEMAPHORE_WAIT |
718 MI_SEMAPHORE_GLOBAL_GTT |
720 MI_SEMAPHORE_SAD_EQ_SDD;
722 *cs++ = ccs_semaphore_offset(rq);
728 static __always_inline u32*
729 gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
731 *cs++ = MI_USER_INTERRUPT;
733 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
734 if (intel_engine_has_semaphores(rq->engine) &&
735 !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
736 cs = gen12_emit_preempt_busywait(rq, cs);
738 /* Wa_14014475959:dg2 */
739 if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
740 cs = ccs_emit_wa_busywait(rq, cs);
742 rq->tail = intel_ring_offset(rq, cs);
743 assert_ring_tail_valid(rq->ring, rq->tail);
745 return gen8_emit_wa_tail(rq, cs);
748 u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
750 /* XXX Stalling flush before seqno write; post-sync not */
751 cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
752 return gen12_emit_fini_breadcrumb_tail(rq, cs);
755 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
757 struct drm_i915_private *i915 = rq->engine->i915;
758 u32 flags = (PIPE_CONTROL_CS_STALL |
759 PIPE_CONTROL_TLB_INVALIDATE |
760 PIPE_CONTROL_TILE_CACHE_FLUSH |
761 PIPE_CONTROL_FLUSH_L3 |
762 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
763 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
764 PIPE_CONTROL_DC_FLUSH_ENABLE |
765 PIPE_CONTROL_FLUSH_ENABLE);
768 if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
769 IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
770 /* dummy PIPE_CONTROL + depth flush */
771 cs = gen12_emit_pipe_control(cs, 0,
772 PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
774 if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
776 flags |= PIPE_CONTROL_DEPTH_STALL;
778 if (!HAS_3D_PIPELINE(rq->engine->i915))
779 flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
780 else if (rq->engine->class == COMPUTE_CLASS)
781 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
783 cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
785 /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
786 cs = gen12_emit_ggtt_write_rcs(cs,
790 PIPE_CONTROL_FLUSH_ENABLE |
791 PIPE_CONTROL_CS_STALL);
793 return gen12_emit_fini_breadcrumb_tail(rq, cs);