]> Git Repo - linux.git/blame - drivers/gpu/drm/i915/intel_ringbuffer.h
drm/i915: Align "unfenced" tiled access on gen2, early gen3
[linux.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
CommitLineData
8187a2b7
ZN
1#ifndef _INTEL_RINGBUFFER_H_
2#define _INTEL_RINGBUFFER_H_
3
44e895a8 4#include <linux/hashtable.h>
06fbca71 5#include "i915_gem_batch_pool.h"
dcff85c8 6#include "i915_gem_request.h"
73cb9701 7#include "i915_gem_timeline.h"
f97fbf96 8#include "i915_selftest.h"
44e895a8
BV
9
10#define I915_CMD_HASH_ORDER 9
11
4712274c
OM
12/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
13 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
14 * to give some inclination as to some of the magic values used in the various
15 * workarounds!
16 */
17#define CACHELINE_BYTES 64
17ee950d 18#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
4712274c 19
633cf8f5
VS
20/*
21 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
22 * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
23 * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
24 *
25 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
26 * cacheline, the Head Pointer must not be greater than the Tail
27 * Pointer."
28 */
29#define I915_RING_FREE_SPACE 64
30
57e88531
CW
31struct intel_hw_status_page {
32 struct i915_vma *vma;
33 u32 *page_addr;
34 u32 ggtt_offset;
8187a2b7
ZN
35};
36
bbdc070a
DG
37#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
38#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
cae5852d 39
bbdc070a
DG
40#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
41#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
cae5852d 42
bbdc070a
DG
43#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
44#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
cae5852d 45
bbdc070a
DG
46#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
47#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
cae5852d 48
bbdc070a
DG
49#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
50#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
870e86dd 51
bbdc070a
DG
52#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
53#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
e9fea574 54
3e78998a
BW
55/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
56 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
57 */
8c12672e
CW
58#define gen8_semaphore_seqno_size sizeof(uint64_t)
59#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
60 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
3e78998a 61#define GEN8_SIGNAL_OFFSET(__ring, to) \
51d545d0 62 (dev_priv->semaphore->node.start + \
8c12672e 63 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
3e78998a 64#define GEN8_WAIT_OFFSET(__ring, from) \
51d545d0 65 (dev_priv->semaphore->node.start + \
8c12672e 66 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
3e78998a 67
7e37f889 68enum intel_engine_hangcheck_action {
3fe3b030
MK
69 ENGINE_IDLE = 0,
70 ENGINE_WAIT,
71 ENGINE_ACTIVE_SEQNO,
72 ENGINE_ACTIVE_HEAD,
73 ENGINE_ACTIVE_SUBUNITS,
74 ENGINE_WAIT_KICK,
75 ENGINE_DEAD,
f2f4d82f 76};
ad8beaea 77
3fe3b030
MK
78static inline const char *
79hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
80{
81 switch (a) {
82 case ENGINE_IDLE:
83 return "idle";
84 case ENGINE_WAIT:
85 return "wait";
86 case ENGINE_ACTIVE_SEQNO:
87 return "active seqno";
88 case ENGINE_ACTIVE_HEAD:
89 return "active head";
90 case ENGINE_ACTIVE_SUBUNITS:
91 return "active subunits";
92 case ENGINE_WAIT_KICK:
93 return "wait kick";
94 case ENGINE_DEAD:
95 return "dead";
96 }
97
98 return "unknown";
99}
b6b0fac0 100
f9e61372
BW
101#define I915_MAX_SLICES 3
102#define I915_MAX_SUBSLICES 3
103
104#define instdone_slice_mask(dev_priv__) \
105 (INTEL_GEN(dev_priv__) == 7 ? \
106 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
107
108#define instdone_subslice_mask(dev_priv__) \
109 (INTEL_GEN(dev_priv__) == 7 ? \
110 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
111
112#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
113 for ((slice__) = 0, (subslice__) = 0; \
114 (slice__) < I915_MAX_SLICES; \
115 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
116 (slice__) += ((subslice__) == 0)) \
117 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
118 (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
119
d636951e
BW
120struct intel_instdone {
121 u32 instdone;
122 /* The following exist only in the RCS engine */
123 u32 slice_common;
f9e61372
BW
124 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
125 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
d636951e
BW
126};
127
7e37f889 128struct intel_engine_hangcheck {
50877445 129 u64 acthd;
92cab734 130 u32 seqno;
7e37f889 131 enum intel_engine_hangcheck_action action;
3fe3b030 132 unsigned long action_timestamp;
4be17381 133 int deadlock;
d636951e 134 struct intel_instdone instdone;
3fe3b030 135 bool stalled;
92cab734
MK
136};
137
7e37f889 138struct intel_ring {
0eb973d3 139 struct i915_vma *vma;
57e88531 140 void *vaddr;
8ee14975 141
4a570db5 142 struct intel_engine_cs *engine;
0c7dd53b 143
675d9ad7
CW
144 struct list_head request_list;
145
8ee14975
OM
146 u32 head;
147 u32 tail;
eca56a35 148
8ee14975
OM
149 int space;
150 int size;
151 int effective_size;
8ee14975
OM
152};
153
e2efd130 154struct i915_gem_context;
361b027b 155struct drm_i915_reg_table;
21076372 156
17ee950d
AS
157/*
158 * we use a single page to load ctx workarounds so all of these
159 * values are referred in terms of dwords
160 *
161 * struct i915_wa_ctx_bb:
162 * offset: specifies batch starting position, also helpful in case
163 * if we want to have multiple batches at different offsets based on
164 * some criteria. It is not a requirement at the moment but provides
165 * an option for future use.
166 * size: size of the batch in DWORDS
167 */
48bb74e4 168struct i915_ctx_workarounds {
17ee950d
AS
169 struct i915_wa_ctx_bb {
170 u32 offset;
171 u32 size;
172 } indirect_ctx, per_ctx;
48bb74e4 173 struct i915_vma *vma;
17ee950d
AS
174};
175
c81d4613 176struct drm_i915_gem_request;
4e50f082 177struct intel_render_state;
c81d4613 178
237ae7c7
MW
179/*
180 * Engine IDs definitions.
181 * Keep instances of the same type engine together.
182 */
183enum intel_engine_id {
184 RCS = 0,
185 BCS,
186 VCS,
187 VCS2,
188#define _VCS(n) (VCS + (n))
189 VECS
190};
191
c033666a
CW
192struct intel_engine_cs {
193 struct drm_i915_private *i915;
8187a2b7 194 const char *name;
237ae7c7 195 enum intel_engine_id id;
426960be 196 unsigned int exec_id;
237ae7c7
MW
197 unsigned int hw_id;
198 unsigned int guc_id;
333e9fe9 199 u32 mmio_base;
c2c7f240 200 unsigned int irq_shift;
7e37f889 201 struct intel_ring *buffer;
73cb9701 202 struct intel_timeline *timeline;
8187a2b7 203
4e50f082
CW
204 struct intel_render_state *render_state;
205
2246bea6 206 atomic_t irq_count;
538b257d
CW
207 unsigned long irq_posted;
208#define ENGINE_IRQ_BREADCRUMB 0
f747026c 209#define ENGINE_IRQ_EXECLIST 1
538b257d 210
688e6c72
CW
211 /* Rather than have every client wait upon all user interrupts,
212 * with the herd waking after every interrupt and each doing the
213 * heavyweight seqno dance, we delegate the task (of being the
214 * bottom-half of the user interrupt) to the first client. After
215 * every interrupt, we wake up one client, who does the heavyweight
216 * coherent seqno read and either goes back to sleep (if incomplete),
217 * or wakes up all the completed clients in parallel, before then
218 * transferring the bottom-half status to the next client in the queue.
219 *
220 * Compared to walking the entire list of waiters in a single dedicated
221 * bottom-half, we reduce the latency of the first waiter by avoiding
222 * a context switch, but incur additional coherent seqno reads when
223 * following the chain of request breadcrumbs. Since it is most likely
224 * that we have a single client waiting on each seqno, then reducing
225 * the overhead of waking that client is much preferred.
226 */
227 struct intel_breadcrumbs {
61d3dc70
CW
228 spinlock_t irq_lock; /* protects irq_*; irqsafe */
229 struct intel_wait *irq_wait; /* oldest waiter by retirement */
230
231 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
688e6c72 232 struct rb_root waiters; /* sorted by retirement, priority */
c81d4613 233 struct rb_root signals; /* sorted by retirement */
c81d4613 234 struct task_struct *signaler; /* used for fence signalling */
cced5e2f 235 struct drm_i915_gem_request __rcu *first_signal;
688e6c72 236 struct timer_list fake_irq; /* used after a missed interrupt */
83348ba8
CW
237 struct timer_list hangcheck; /* detect missed interrupts */
238
2246bea6 239 unsigned int hangcheck_interrupts;
aca34b6e 240
67b807a8 241 bool irq_armed : 1;
aca34b6e 242 bool irq_enabled : 1;
f97fbf96 243 I915_SELFTEST_DECLARE(bool mock : 1);
688e6c72
CW
244 } breadcrumbs;
245
06fbca71
CW
246 /*
247 * A pool of objects to use as shadow copies of client batch buffers
248 * when the command parser is enabled. Prevents the client from
249 * modifying the batch contents after software parsing.
250 */
251 struct i915_gem_batch_pool batch_pool;
252
8187a2b7 253 struct intel_hw_status_page status_page;
17ee950d 254 struct i915_ctx_workarounds wa_ctx;
56c0f1a7 255 struct i915_vma *scratch;
8187a2b7 256
61ff75ac
CW
257 u32 irq_keep_mask; /* always keep these interrupts */
258 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
38a0f2db
DG
259 void (*irq_enable)(struct intel_engine_cs *engine);
260 void (*irq_disable)(struct intel_engine_cs *engine);
8187a2b7 261
38a0f2db 262 int (*init_hw)(struct intel_engine_cs *engine);
821ed7df
CW
263 void (*reset_hw)(struct intel_engine_cs *engine,
264 struct drm_i915_gem_request *req);
8187a2b7 265
ff44ad51
CW
266 void (*set_default_submission)(struct intel_engine_cs *engine);
267
e8a9c58f
CW
268 int (*context_pin)(struct intel_engine_cs *engine,
269 struct i915_gem_context *ctx);
270 void (*context_unpin)(struct intel_engine_cs *engine,
271 struct i915_gem_context *ctx);
f73e7399 272 int (*request_alloc)(struct drm_i915_gem_request *req);
8753181e 273 int (*init_context)(struct drm_i915_gem_request *req);
86d7f238 274
ddd66c51
CW
275 int (*emit_flush)(struct drm_i915_gem_request *request,
276 u32 mode);
277#define EMIT_INVALIDATE BIT(0)
278#define EMIT_FLUSH BIT(1)
279#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
280 int (*emit_bb_start)(struct drm_i915_gem_request *req,
281 u64 offset, u32 length,
282 unsigned int dispatch_flags);
283#define I915_DISPATCH_SECURE BIT(0)
284#define I915_DISPATCH_PINNED BIT(1)
285#define I915_DISPATCH_RS BIT(2)
caddfe71 286 void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
73dec95e 287 u32 *cs);
98f29e8d 288 int emit_breadcrumb_sz;
5590af3e
CW
289
290 /* Pass the request to the hardware queue (e.g. directly into
291 * the legacy ringbuffer or to the end of an execlist).
292 *
293 * This is called from an atomic context with irqs disabled; must
294 * be irq safe.
295 */
ddd66c51 296 void (*submit_request)(struct drm_i915_gem_request *req);
5590af3e 297
0de9136d
CW
298 /* Call when the priority on a request has changed and it and its
299 * dependencies may need rescheduling. Note the request itself may
300 * not be ready to run!
301 *
302 * Called under the struct_mutex.
303 */
304 void (*schedule)(struct drm_i915_gem_request *request,
305 int priority);
306
b2eadbc8
CW
307 /* Some chipsets are not quite as coherent as advertised and need
308 * an expensive kick to force a true read of the up-to-date seqno.
309 * However, the up-to-date seqno is not always required and the last
310 * seen value is good enough. Note that the seqno will always be
311 * monotonic, even if not coherent.
312 */
38a0f2db 313 void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
38a0f2db 314 void (*cleanup)(struct intel_engine_cs *engine);
ebc348b2 315
3e78998a
BW
316 /* GEN8 signal/wait table - never trust comments!
317 * signal to signal to signal to signal to signal to
318 * RCS VCS BCS VECS VCS2
319 * --------------------------------------------------------------------
320 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
321 * |-------------------------------------------------------------------
322 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
323 * |-------------------------------------------------------------------
324 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
325 * |-------------------------------------------------------------------
326 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
327 * |-------------------------------------------------------------------
328 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
329 * |-------------------------------------------------------------------
330 *
331 * Generalization:
332 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
333 * ie. transpose of g(x, y)
334 *
335 * sync from sync from sync from sync from sync from
336 * RCS VCS BCS VECS VCS2
337 * --------------------------------------------------------------------
338 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
339 * |-------------------------------------------------------------------
340 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
341 * |-------------------------------------------------------------------
342 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
343 * |-------------------------------------------------------------------
344 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
345 * |-------------------------------------------------------------------
346 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
347 * |-------------------------------------------------------------------
348 *
349 * Generalization:
350 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
351 * ie. transpose of f(x, y)
352 */
ebc348b2 353 struct {
3e78998a 354 union {
318f89ca
TU
355#define GEN6_SEMAPHORE_LAST VECS_HW
356#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
357#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
3e78998a
BW
358 struct {
359 /* our mbox written by others */
318f89ca 360 u32 wait[GEN6_NUM_SEMAPHORES];
3e78998a 361 /* mboxes this ring signals to */
318f89ca 362 i915_reg_t signal[GEN6_NUM_SEMAPHORES];
3e78998a 363 } mbox;
666796da 364 u64 signal_ggtt[I915_NUM_ENGINES];
3e78998a 365 };
78325f2d
BW
366
367 /* AKA wait() */
ad7bdb2b
CW
368 int (*sync_to)(struct drm_i915_gem_request *req,
369 struct drm_i915_gem_request *signal);
73dec95e 370 u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
ebc348b2 371 } semaphore;
ad776f8b 372
4da46e1e 373 /* Execlists */
27af5eea 374 struct tasklet_struct irq_tasklet;
70c2a24d
CW
375 struct execlist_port {
376 struct drm_i915_gem_request *request;
377 unsigned int count;
ae9a043b 378 GEM_DEBUG_DECL(u32 context_id);
70c2a24d 379 } execlist_port[2];
20311bd3
CW
380 struct rb_root execlist_queue;
381 struct rb_node *execlist_first;
3756685a 382 unsigned int fw_domains;
4da46e1e 383
e8a9c58f
CW
384 /* Contexts are pinned whilst they are active on the GPU. The last
385 * context executed remains active whilst the GPU is idle - the
386 * switch away and write to the context object only occurs on the
387 * next execution. Contexts are only unpinned on retirement of the
388 * following request ensuring that we can always write to the object
389 * on the context switch even after idling. Across suspend, we switch
390 * to the kernel context and trash it as the save may not happen
391 * before the hardware is powered down.
392 */
393 struct i915_gem_context *last_retired_context;
394
395 /* We track the current MI_SET_CONTEXT in order to eliminate
396 * redudant context switches. This presumes that requests are not
397 * reordered! Or when they are the tracking is updated along with
398 * the emission of individual requests into the legacy command
399 * stream (ring).
400 */
401 struct i915_gem_context *legacy_active_context;
40521054 402
3fc03069
CD
403 /* status_notifier: list of callbacks for context-switch changes */
404 struct atomic_notifier_head context_status_notifier;
405
7e37f889 406 struct intel_engine_hangcheck hangcheck;
92cab734 407
44e895a8
BV
408 bool needs_cmd_parser;
409
351e3db2 410 /*
44e895a8 411 * Table of commands the command parser needs to know about
33a051a5 412 * for this engine.
351e3db2 413 */
44e895a8 414 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
351e3db2
BV
415
416 /*
417 * Table of registers allowed in commands that read/write registers.
418 */
361b027b
JJ
419 const struct drm_i915_reg_table *reg_tables;
420 int reg_table_count;
351e3db2
BV
421
422 /*
423 * Returns the bitmask for the length field of the specified command.
424 * Return 0 for an unrecognized/invalid command.
425 *
33a051a5 426 * If the command parser finds an entry for a command in the engine's
351e3db2 427 * cmd_tables, it gets the command's length based on the table entry.
33a051a5
CW
428 * If not, it calls this function to determine the per-engine length
429 * field encoding for the command (i.e. different opcode ranges use
430 * certain bits to encode the command length in the header).
351e3db2
BV
431 */
432 u32 (*get_cmd_length_mask)(u32 cmd_header);
8187a2b7
ZN
433};
434
96154f2f 435static inline unsigned
67d97da3 436intel_engine_flag(const struct intel_engine_cs *engine)
96154f2f 437{
0bc40be8 438 return 1 << engine->id;
96154f2f
SV
439}
440
319404df 441static inline void
0bc40be8 442intel_flush_status_page(struct intel_engine_cs *engine, int reg)
319404df 443{
0d317ce9
CW
444 mb();
445 clflush(&engine->status_page.page_addr[reg]);
446 mb();
319404df
ID
447}
448
8187a2b7 449static inline u32
5dd8e50c 450intel_read_status_page(struct intel_engine_cs *engine, int reg)
8187a2b7 451{
4225d0f2 452 /* Ensure that the compiler doesn't optimize away the load. */
5dd8e50c 453 return READ_ONCE(engine->status_page.page_addr[reg]);
8187a2b7
ZN
454}
455
b70ec5bf 456static inline void
0bc40be8 457intel_write_status_page(struct intel_engine_cs *engine,
b70ec5bf
MK
458 int reg, u32 value)
459{
14a6bbf9
CW
460 mb();
461 clflush(&engine->status_page.page_addr[reg]);
0bc40be8 462 engine->status_page.page_addr[reg] = value;
14a6bbf9
CW
463 clflush(&engine->status_page.page_addr[reg]);
464 mb();
b70ec5bf
MK
465}
466
e2828914 467/*
311bd68e
CW
468 * Reads a dword out of the status page, which is written to from the command
469 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
470 * MI_STORE_DATA_IMM.
471 *
472 * The following dwords have a reserved meaning:
473 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
474 * 0x04: ring 0 head pointer
475 * 0x05: ring 1 head pointer (915-class)
476 * 0x06: ring 2 head pointer (915-class)
477 * 0x10-0x1b: Context status DWords (GM45)
478 * 0x1f: Last written status offset. (GM45)
b07da53c 479 * 0x20-0x2f: Reserved (Gen6+)
311bd68e 480 *
b07da53c 481 * The area from dword 0x30 to 0x3ff is available for driver usage.
311bd68e 482 */
b07da53c 483#define I915_GEM_HWS_INDEX 0x30
7c17d377 484#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
b07da53c 485#define I915_GEM_HWS_SCRATCH_INDEX 0x40
9a289771 486#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
311bd68e 487
7e37f889
CW
488struct intel_ring *
489intel_engine_create_ring(struct intel_engine_cs *engine, int size);
d3ef1af6 490int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
aad29fbb 491void intel_ring_unpin(struct intel_ring *ring);
7e37f889 492void intel_ring_free(struct intel_ring *ring);
84c2377f 493
7e37f889
CW
494void intel_engine_stop(struct intel_engine_cs *engine);
495void intel_engine_cleanup(struct intel_engine_cs *engine);
96f298aa 496
821ed7df
CW
497void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
498
bba09b12 499int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
406ea8d2 500
73dec95e 501u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n);
406ea8d2 502
73dec95e
TU
503static inline void
504intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
09246732 505{
8f942018
CW
506 /* Dummy function.
507 *
508 * This serves as a placeholder in the code so that the reader
509 * can compare against the preceding intel_ring_begin() and
510 * check that the number of dwords emitted matches the space
511 * reserved for the command packet (i.e. the value passed to
512 * intel_ring_begin()).
c5efa1ad 513 */
73dec95e 514 GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
8f942018
CW
515}
516
73dec95e
TU
517static inline u32
518intel_ring_offset(struct drm_i915_gem_request *req, void *addr)
8f942018
CW
519{
520 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
73dec95e
TU
521 u32 offset = addr - req->ring->vaddr;
522 GEM_BUG_ON(offset > req->ring->size);
523 return offset & (req->ring->size - 1);
09246732 524}
406ea8d2 525
32c04f16 526void intel_ring_update_space(struct intel_ring *ring);
09246732 527
73cb9701 528void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
8187a2b7 529
019bf277
TU
530void intel_engine_setup_common(struct intel_engine_cs *engine);
531int intel_engine_init_common(struct intel_engine_cs *engine);
adc320c4 532int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
96a945aa 533void intel_engine_cleanup_common(struct intel_engine_cs *engine);
019bf277 534
8b3e2d36
TU
535int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
536int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
537int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
538int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
539int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
8187a2b7 540
7e37f889 541u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
1b36595f
CW
542u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
543
1b7744e7
CW
544static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
545{
546 return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
547}
79f321b7 548
cb399eab
CW
549static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
550{
551 /* We are only peeking at the tail of the submit queue (and not the
552 * queue itself) in order to gain a hint as to the current active
553 * state of the engine. Callers are not expected to be taking
554 * engine->timeline->lock, nor are they expected to be concerned
555 * wtih serialising this hint with anything, so document it as
556 * a hint and nothing more.
557 */
9b6586ae 558 return READ_ONCE(engine->timeline->seqno);
cb399eab
CW
559}
560
0bc40be8 561int init_workarounds_ring(struct intel_engine_cs *engine);
4ac9659e 562int intel_ring_workarounds_emit(struct drm_i915_gem_request *req);
771b9a53 563
0e704476
CW
564void intel_engine_get_instdone(struct intel_engine_cs *engine,
565 struct intel_instdone *instdone);
566
29b1b415
JH
567/*
568 * Arbitrary size for largest possible 'add request' sequence. The code paths
569 * are complex and variable. Empirical measurement shows that the worst case
596e5efc
CW
570 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
571 * we need to allocate double the largest single packet within that emission
572 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
29b1b415 573 */
596e5efc 574#define MIN_SPACE_FOR_ADD_REQUEST 336
29b1b415 575
a58c01aa
CW
576static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
577{
57e88531 578 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
a58c01aa
CW
579}
580
688e6c72 581/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
688e6c72
CW
582int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
583
56299fb7
CW
584static inline void intel_wait_init(struct intel_wait *wait,
585 struct drm_i915_gem_request *rq)
688e6c72
CW
586{
587 wait->tsk = current;
56299fb7 588 wait->request = rq;
754c9fd5
CW
589}
590
591static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
592{
593 wait->tsk = current;
594 wait->seqno = seqno;
595}
596
597static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
598{
599 return wait->seqno;
600}
601
602static inline bool
603intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
604{
688e6c72 605 wait->seqno = seqno;
754c9fd5
CW
606 return intel_wait_has_seqno(wait);
607}
608
609static inline bool
610intel_wait_update_request(struct intel_wait *wait,
611 const struct drm_i915_gem_request *rq)
612{
613 return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
614}
615
616static inline bool
617intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
618{
619 return wait->seqno == seqno;
620}
621
622static inline bool
623intel_wait_check_request(const struct intel_wait *wait,
624 const struct drm_i915_gem_request *rq)
625{
626 return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
688e6c72
CW
627}
628
629static inline bool intel_wait_complete(const struct intel_wait *wait)
630{
631 return RB_EMPTY_NODE(&wait->node);
632}
633
634bool intel_engine_add_wait(struct intel_engine_cs *engine,
635 struct intel_wait *wait);
636void intel_engine_remove_wait(struct intel_engine_cs *engine,
637 struct intel_wait *wait);
b3850855 638void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
9eb143bb 639void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
688e6c72 640
dbd6ef29 641static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
688e6c72 642{
61d3dc70 643 return READ_ONCE(engine->breadcrumbs.irq_wait);
688e6c72
CW
644}
645
8d769ea7
CW
646unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
647#define ENGINE_WAKEUP_WAITER BIT(0)
67b807a8
CW
648#define ENGINE_WAKEUP_ASLEEP BIT(1)
649
650void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
651void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 652
ad07dfcd 653void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
688e6c72 654void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
9b6586ae 655bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
688e6c72 656
9f235dfa
TU
657static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
658{
659 memset(batch, 0, 6 * sizeof(u32));
660
661 batch[0] = GFX_OP_PIPE_CONTROL(6);
662 batch[1] = flags;
663 batch[2] = offset;
664
665 return batch + 6;
666}
667
5400367a 668bool intel_engine_is_idle(struct intel_engine_cs *engine);
05425249 669bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
5400367a 670
ff44ad51
CW
671void intel_engines_reset_default_submission(struct drm_i915_private *i915);
672
8187a2b7 673#endif /* _INTEL_RINGBUFFER_H_ */
This page took 0.91455 seconds and 4 git commands to generate.