2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2016 Intel Corporation
7 #include "display/intel_display.h"
8 #include "display/intel_frontbuffer.h"
9 #include "gt/intel_gt.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_domain.h"
14 #include "i915_gem_gtt.h"
15 #include "i915_gem_ioctls.h"
16 #include "i915_gem_lmem.h"
17 #include "i915_gem_mman.h"
18 #include "i915_gem_object.h"
21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
25 struct drm_i915_private *i915 = to_i915(obj->base.dev);
31 * For objects created by userspace through GEM_CREATE with pat_index
32 * set by set_pat extension, i915_gem_object_has_cache_level() will
33 * always return true, because the coherency of such object is managed
34 * by userspace. Othereise the call here would fall back to checking
35 * whether the object is un-cached or write-through.
37 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
38 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
43 struct drm_i915_private *i915 = to_i915(obj->base.dev);
51 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
54 /* Currently in use by HW (display engine)? Keep flushed. */
55 return i915_gem_object_is_framebuffer(obj);
59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
63 assert_object_held(obj);
65 if (!(obj->write_domain & flush_domains))
68 switch (obj->write_domain) {
69 case I915_GEM_DOMAIN_GTT:
70 spin_lock(&obj->vma.lock);
71 for_each_ggtt_vma(vma, obj) {
72 if (i915_vma_unset_ggtt_write(vma))
73 intel_gt_flush_ggtt_writes(vma->vm->gt);
75 spin_unlock(&obj->vma.lock);
77 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
80 case I915_GEM_DOMAIN_WC:
84 case I915_GEM_DOMAIN_CPU:
85 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
88 case I915_GEM_DOMAIN_RENDER:
89 if (gpu_write_needs_clflush(obj))
90 obj->cache_dirty = true;
94 obj->write_domain = 0;
97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
100 * We manually flush the CPU domain so that we can override and
101 * force the flush for the display, and perform it asyncrhonously.
103 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
104 if (obj->cache_dirty)
105 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
106 obj->write_domain = 0;
109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
111 if (!i915_gem_object_is_framebuffer(obj))
114 i915_gem_object_lock(obj, NULL);
115 __i915_gem_object_flush_for_display(obj);
116 i915_gem_object_unlock(obj);
119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
121 if (i915_gem_object_is_framebuffer(obj))
122 __i915_gem_object_flush_for_display(obj);
126 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
127 * possibly write domain.
128 * @obj: object to act on
129 * @write: ask for write access or read only
131 * This function returns when the move is complete, including waiting on
135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
139 assert_object_held(obj);
141 ret = i915_gem_object_wait(obj,
142 I915_WAIT_INTERRUPTIBLE |
143 (write ? I915_WAIT_ALL : 0),
144 MAX_SCHEDULE_TIMEOUT);
148 if (obj->write_domain == I915_GEM_DOMAIN_WC)
151 /* Flush and acquire obj->pages so that we are coherent through
152 * direct access in memory with previous cached writes through
153 * shmemfs and that our cache domain tracking remains valid.
154 * For example, if the obj->filp was moved to swap without us
155 * being notified and releasing the pages, we would mistakenly
156 * continue to assume that the obj remained out of the CPU cached
159 ret = i915_gem_object_pin_pages(obj);
163 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
165 /* Serialise direct access to this object with the barriers for
166 * coherent writes from the GPU, by effectively invalidating the
167 * WC domain upon first access.
169 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
172 /* It should now be out of any other write domains, and we can update
173 * the domain values for our changes.
175 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
176 obj->read_domains |= I915_GEM_DOMAIN_WC;
178 obj->read_domains = I915_GEM_DOMAIN_WC;
179 obj->write_domain = I915_GEM_DOMAIN_WC;
180 obj->mm.dirty = true;
183 i915_gem_object_unpin_pages(obj);
188 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
189 * and possibly write domain.
190 * @obj: object to act on
191 * @write: ask for write access or read only
193 * This function returns when the move is complete, including waiting on
197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
201 assert_object_held(obj);
203 ret = i915_gem_object_wait(obj,
204 I915_WAIT_INTERRUPTIBLE |
205 (write ? I915_WAIT_ALL : 0),
206 MAX_SCHEDULE_TIMEOUT);
210 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
213 /* Flush and acquire obj->pages so that we are coherent through
214 * direct access in memory with previous cached writes through
215 * shmemfs and that our cache domain tracking remains valid.
216 * For example, if the obj->filp was moved to swap without us
217 * being notified and releasing the pages, we would mistakenly
218 * continue to assume that the obj remained out of the CPU cached
221 ret = i915_gem_object_pin_pages(obj);
225 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
227 /* Serialise direct access to this object with the barriers for
228 * coherent writes from the GPU, by effectively invalidating the
229 * GTT domain upon first access.
231 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
234 /* It should now be out of any other write domains, and we can update
235 * the domain values for our changes.
237 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
238 obj->read_domains |= I915_GEM_DOMAIN_GTT;
240 struct i915_vma *vma;
242 obj->read_domains = I915_GEM_DOMAIN_GTT;
243 obj->write_domain = I915_GEM_DOMAIN_GTT;
244 obj->mm.dirty = true;
246 spin_lock(&obj->vma.lock);
247 for_each_ggtt_vma(vma, obj)
248 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
249 i915_vma_set_ggtt_write(vma);
250 spin_unlock(&obj->vma.lock);
253 i915_gem_object_unpin_pages(obj);
258 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
259 * @obj: object to act on
260 * @cache_level: new cache level to set for the object
262 * After this function returns, the object will be in the new cache-level
263 * across all GTT and the contents of the backing storage will be coherent,
264 * with respect to the new cache-level. In order to keep the backing storage
265 * coherent for all users, we only allow a single cache level to be set
266 * globally on the object and prevent it from being changed whilst the
267 * hardware is reading from the object. That is if the object is currently
268 * on the scanout it will be set to uncached (or equivalent display
269 * cache coherency) and all non-MOCS GPU access will also be uncached so
270 * that all direct access to the scanout remains coherent.
272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
273 enum i915_cache_level cache_level)
278 * For objects created by userspace through GEM_CREATE with pat_index
279 * set by set_pat extension, simply return 0 here without touching
280 * the cache setting, because such objects should have an immutable
281 * cache setting by desgin and always managed by userspace.
283 if (i915_gem_object_has_cache_level(obj, cache_level))
286 ret = i915_gem_object_wait(obj,
287 I915_WAIT_INTERRUPTIBLE |
289 MAX_SCHEDULE_TIMEOUT);
293 /* Always invalidate stale cachelines */
294 i915_gem_object_set_cache_coherency(obj, cache_level);
295 obj->cache_dirty = true;
297 /* The cache-level will be applied when each vma is rebound. */
298 return i915_gem_object_unbind(obj,
299 I915_GEM_OBJECT_UNBIND_ACTIVE |
300 I915_GEM_OBJECT_UNBIND_BARRIER);
303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
304 struct drm_file *file)
306 struct drm_i915_gem_caching *args = data;
307 struct drm_i915_gem_object *obj;
310 if (IS_DGFX(to_i915(dev)))
314 obj = i915_gem_object_lookup_rcu(file, args->handle);
321 * This ioctl should be disabled for the objects with pat_index
324 if (obj->pat_set_by_user) {
329 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
330 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
331 args->caching = I915_CACHING_CACHED;
332 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
333 args->caching = I915_CACHING_DISPLAY;
335 args->caching = I915_CACHING_NONE;
341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
342 struct drm_file *file)
344 struct drm_i915_private *i915 = to_i915(dev);
345 struct drm_i915_gem_caching *args = data;
346 struct drm_i915_gem_object *obj;
347 enum i915_cache_level level;
353 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
356 switch (args->caching) {
357 case I915_CACHING_NONE:
358 level = I915_CACHE_NONE;
360 case I915_CACHING_CACHED:
362 * Due to a HW issue on BXT A stepping, GPU stores via a
363 * snooped mapping may leave stale data in a corresponding CPU
364 * cacheline, whereas normally such cachelines would get
367 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
370 level = I915_CACHE_LLC;
372 case I915_CACHING_DISPLAY:
373 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
379 obj = i915_gem_object_lookup(file, args->handle);
384 * This ioctl should be disabled for the objects with pat_index
387 if (obj->pat_set_by_user) {
393 * The caching mode of proxy object is handled by its generator, and
394 * not allowed to be changed by userspace.
396 if (i915_gem_object_is_proxy(obj)) {
398 * Silently allow cached for userptr; the vulkan driver
399 * sets all objects to cached
401 if (!i915_gem_object_is_userptr(obj) ||
402 args->caching != I915_CACHING_CACHED)
408 ret = i915_gem_object_lock_interruptible(obj, NULL);
412 ret = i915_gem_object_set_cache_level(obj, level);
413 i915_gem_object_unlock(obj);
416 i915_gem_object_put(obj);
421 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
422 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
423 * (for pageflips). We only flush the caches while preparing the buffer for
424 * display, the callers are responsible for frontbuffer flush.
427 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
428 struct i915_gem_ww_ctx *ww,
430 const struct i915_gtt_view *view,
433 struct drm_i915_private *i915 = to_i915(obj->base.dev);
434 struct i915_vma *vma;
437 /* Frame buffer must be in LMEM */
438 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
439 return ERR_PTR(-EINVAL);
442 * The display engine is not coherent with the LLC cache on gen6. As
443 * a result, we make sure that the pinning that is about to occur is
444 * done with uncached PTEs. This is lowest common denominator for all
447 * However for gen6+, we could do better by using the GFDT bit instead
448 * of uncaching, which would allow us to flush all the LLC-cached data
449 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
451 ret = i915_gem_object_set_cache_level(obj,
453 I915_CACHE_WT : I915_CACHE_NONE);
457 /* VT-d may overfetch before/after the vma, so pad with scratch */
458 if (intel_scanout_needs_vtd_wa(i915)) {
459 unsigned int guard = VTD_GUARD;
461 if (i915_gem_object_is_tiled(obj))
463 i915_gem_object_get_tile_row_size(obj));
465 flags |= PIN_OFFSET_GUARD | guard;
469 * As the user may map the buffer once pinned in the display plane
470 * (e.g. libkms for the bootup splash), we have to ensure that we
471 * always use map_and_fenceable for all scanout buffers. However,
472 * it may simply be too big to fit into mappable, in which case
473 * put it anyway and hope that userspace can cope (but always first
474 * try to preserve the existing ABI).
476 vma = ERR_PTR(-ENOSPC);
477 if ((flags & PIN_MAPPABLE) == 0 &&
478 (!view || view->type == I915_GTT_VIEW_NORMAL))
479 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
480 flags | PIN_MAPPABLE |
482 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
483 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
488 vma->display_alignment = max(vma->display_alignment, alignment);
489 i915_vma_mark_scanout(vma);
491 i915_gem_object_flush_if_display_locked(obj);
497 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
498 * and possibly write domain.
499 * @obj: object to act on
500 * @write: requesting write or read-only access
502 * This function returns when the move is complete, including waiting on
506 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
510 assert_object_held(obj);
512 ret = i915_gem_object_wait(obj,
513 I915_WAIT_INTERRUPTIBLE |
514 (write ? I915_WAIT_ALL : 0),
515 MAX_SCHEDULE_TIMEOUT);
519 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
521 /* Flush the CPU cache if it's still invalid. */
522 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
523 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
524 obj->read_domains |= I915_GEM_DOMAIN_CPU;
527 /* It should now be out of any other write domains, and we can update
528 * the domain values for our changes.
530 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
532 /* If we're writing through the CPU, then the GPU read domains will
533 * need to be invalidated at next use.
536 __start_cpu_write(obj);
542 * i915_gem_set_domain_ioctl - Called when user space prepares to use an
543 * object with the CPU, either
544 * through the mmap ioctl's mapping or a GTT mapping.
546 * @data: ioctl data blob
550 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
551 struct drm_file *file)
553 struct drm_i915_gem_set_domain *args = data;
554 struct drm_i915_gem_object *obj;
555 u32 read_domains = args->read_domains;
556 u32 write_domain = args->write_domain;
559 if (IS_DGFX(to_i915(dev)))
562 /* Only handle setting domains to types used by the CPU. */
563 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
567 * Having something in the write domain implies it's in the read
568 * domain, and only that read domain. Enforce that in the request.
570 if (write_domain && read_domains != write_domain)
576 obj = i915_gem_object_lookup(file, args->handle);
581 * Try to flush the object off the GPU without holding the lock.
582 * We will repeat the flush holding the lock in the normal manner
583 * to catch cases where we are gazumped.
585 err = i915_gem_object_wait(obj,
586 I915_WAIT_INTERRUPTIBLE |
588 (write_domain ? I915_WAIT_ALL : 0),
589 MAX_SCHEDULE_TIMEOUT);
593 if (i915_gem_object_is_userptr(obj)) {
595 * Try to grab userptr pages, iris uses set_domain to check
598 err = i915_gem_object_userptr_validate(obj);
600 err = i915_gem_object_wait(obj,
601 I915_WAIT_INTERRUPTIBLE |
603 (write_domain ? I915_WAIT_ALL : 0),
604 MAX_SCHEDULE_TIMEOUT);
609 * Proxy objects do not control access to the backing storage, ergo
610 * they cannot be used as a means to manipulate the cache domain
611 * tracking for that backing storage. The proxy object is always
612 * considered to be outside of any cache domain.
614 if (i915_gem_object_is_proxy(obj)) {
619 err = i915_gem_object_lock_interruptible(obj, NULL);
624 * Flush and acquire obj->pages so that we are coherent through
625 * direct access in memory with previous cached writes through
626 * shmemfs and that our cache domain tracking remains valid.
627 * For example, if the obj->filp was moved to swap without us
628 * being notified and releasing the pages, we would mistakenly
629 * continue to assume that the obj remained out of the CPU cached
632 err = i915_gem_object_pin_pages(obj);
637 * Already in the desired write domain? Nothing for us to do!
639 * We apply a little bit of cunning here to catch a broader set of
640 * no-ops. If obj->write_domain is set, we must be in the same
641 * obj->read_domains, and only that domain. Therefore, if that
642 * obj->write_domain matches the request read_domains, we are
643 * already in the same read/write domain and can skip the operation,
644 * without having to further check the requested write_domain.
646 if (READ_ONCE(obj->write_domain) == read_domains)
649 if (read_domains & I915_GEM_DOMAIN_WC)
650 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
651 else if (read_domains & I915_GEM_DOMAIN_GTT)
652 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
654 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
657 i915_gem_object_unpin_pages(obj);
660 i915_gem_object_unlock(obj);
662 if (!err && write_domain)
663 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
666 i915_gem_object_put(obj);
671 * Pins the specified object's pages and synchronizes the object with
672 * GPU accesses. Sets needs_clflush to non-zero if the caller should
673 * flush the object from the CPU cache.
675 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
676 unsigned int *needs_clflush)
681 if (!i915_gem_object_has_struct_page(obj))
684 assert_object_held(obj);
686 ret = i915_gem_object_wait(obj,
687 I915_WAIT_INTERRUPTIBLE,
688 MAX_SCHEDULE_TIMEOUT);
692 ret = i915_gem_object_pin_pages(obj);
696 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
697 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
698 ret = i915_gem_object_set_to_cpu_domain(obj, false);
705 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
707 /* If we're not in the cpu read domain, set ourself into the gtt
708 * read domain and manually flush cachelines (if required). This
709 * optimizes for the case when the gpu will dirty the data
710 * anyway again before the next pread happens.
712 if (!obj->cache_dirty &&
713 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
714 *needs_clflush = CLFLUSH_BEFORE;
717 /* return with the pages pinned */
721 i915_gem_object_unpin_pages(obj);
725 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
726 unsigned int *needs_clflush)
731 if (!i915_gem_object_has_struct_page(obj))
734 assert_object_held(obj);
736 ret = i915_gem_object_wait(obj,
737 I915_WAIT_INTERRUPTIBLE |
739 MAX_SCHEDULE_TIMEOUT);
743 ret = i915_gem_object_pin_pages(obj);
747 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
748 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
749 ret = i915_gem_object_set_to_cpu_domain(obj, true);
756 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
758 /* If we're not in the cpu write domain, set ourself into the
759 * gtt write domain and manually flush cachelines (as required).
760 * This optimizes for the case when the gpu will use the data
761 * right away and we therefore have to clflush anyway.
763 if (!obj->cache_dirty) {
764 *needs_clflush |= CLFLUSH_AFTER;
767 * Same trick applies to invalidate partially written
768 * cachelines read before writing.
770 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
771 *needs_clflush |= CLFLUSH_BEFORE;
775 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
776 obj->mm.dirty = true;
777 /* return with the pages pinned */
781 i915_gem_object_unpin_pages(obj);