drivers/gpu/drm/i915/gem/i915_gem_domain.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2014-2016 Intel Corporation
   5  */
   6
   7 #include "display/intel_display.h"
   8 #include "display/intel_frontbuffer.h"
   9 #include "gt/intel_gt.h"
  10
  11 #include "i915_drv.h"
  12 #include "i915_gem_clflush.h"
  13 #include "i915_gem_domain.h"
  14 #include "i915_gem_gtt.h"
  15 #include "i915_gem_ioctls.h"
  16 #include "i915_gem_lmem.h"
  17 #include "i915_gem_mman.h"
  18 #include "i915_gem_object.h"
  19 #include "i915_vma.h"
  20
  21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
  22
  23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  24 {
  25         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  26
  27         if (IS_DGFX(i915))
  28                 return false;
  29
  30         /*
  31          * For objects created by userspace through GEM_CREATE with pat_index
  32          * set by set_pat extension, i915_gem_object_has_cache_level() will
  33          * always return true, because the coherency of such object is managed
  34          * by userspace. Othereise the call here would fall back to checking
  35          * whether the object is un-cached or write-through.
  36          */
  37         return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
  38                  i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
  39 }
  40
  41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  42 {
  43         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  44
  45         if (obj->cache_dirty)
  46                 return false;
  47
  48         if (IS_DGFX(i915))
  49                 return false;
  50
  51         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  52                 return true;
  53
  54         /* Currently in use by HW (display engine)? Keep flushed. */
  55         return i915_gem_object_is_framebuffer(obj);
  56 }
  57
  58 static void
  59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
  60 {
  61         struct i915_vma *vma;
  62
  63         assert_object_held(obj);
  64
  65         if (!(obj->write_domain & flush_domains))
  66                 return;
  67
  68         switch (obj->write_domain) {
  69         case I915_GEM_DOMAIN_GTT:
  70                 spin_lock(&obj->vma.lock);
  71                 for_each_ggtt_vma(vma, obj) {
  72                         if (i915_vma_unset_ggtt_write(vma))
  73                                 intel_gt_flush_ggtt_writes(vma->vm->gt);
  74                 }
  75                 spin_unlock(&obj->vma.lock);
  76
  77                 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
  78                 break;
  79
  80         case I915_GEM_DOMAIN_WC:
  81                 wmb();
  82                 break;
  83
  84         case I915_GEM_DOMAIN_CPU:
  85                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
  86                 break;
  87
  88         case I915_GEM_DOMAIN_RENDER:
  89                 if (gpu_write_needs_clflush(obj))
  90                         obj->cache_dirty = true;
  91                 break;
  92         }
  93
  94         obj->write_domain = 0;
  95 }
  96
  97 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  98 {
  99         /*
 100          * We manually flush the CPU domain so that we can override and
 101          * force the flush for the display, and perform it asyncrhonously.
 102          */
 103         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 104         if (obj->cache_dirty)
 105                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 106         obj->write_domain = 0;
 107 }
 108
 109 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 110 {
 111         if (!i915_gem_object_is_framebuffer(obj))
 112                 return;
 113
 114         i915_gem_object_lock(obj, NULL);
 115         __i915_gem_object_flush_for_display(obj);
 116         i915_gem_object_unlock(obj);
 117 }
 118
 119 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
 120 {
 121         if (i915_gem_object_is_framebuffer(obj))
 122                 __i915_gem_object_flush_for_display(obj);
 123 }
 124
 125 /**
 126  * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
 127  *                                    possibly write domain.
 128  * @obj: object to act on
 129  * @write: ask for write access or read only
 130  *
 131  * This function returns when the move is complete, including waiting on
 132  * flushes to occur.
 133  */
 134 int
 135 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 136 {
 137         int ret;
 138
 139         assert_object_held(obj);
 140
 141         ret = i915_gem_object_wait(obj,
 142                                    I915_WAIT_INTERRUPTIBLE |
 143                                    (write ? I915_WAIT_ALL : 0),
 144                                    MAX_SCHEDULE_TIMEOUT);
 145         if (ret)
 146                 return ret;
 147
 148         if (obj->write_domain == I915_GEM_DOMAIN_WC)
 149                 return 0;
 150
 151         /* Flush and acquire obj->pages so that we are coherent through
 152          * direct access in memory with previous cached writes through
 153          * shmemfs and that our cache domain tracking remains valid.
 154          * For example, if the obj->filp was moved to swap without us
 155          * being notified and releasing the pages, we would mistakenly
 156          * continue to assume that the obj remained out of the CPU cached
 157          * domain.
 158          */
 159         ret = i915_gem_object_pin_pages(obj);
 160         if (ret)
 161                 return ret;
 162
 163         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 164
 165         /* Serialise direct access to this object with the barriers for
 166          * coherent writes from the GPU, by effectively invalidating the
 167          * WC domain upon first access.
 168          */
 169         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
 170                 mb();
 171
 172         /* It should now be out of any other write domains, and we can update
 173          * the domain values for our changes.
 174          */
 175         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 176         obj->read_domains |= I915_GEM_DOMAIN_WC;
 177         if (write) {
 178                 obj->read_domains = I915_GEM_DOMAIN_WC;
 179                 obj->write_domain = I915_GEM_DOMAIN_WC;
 180                 obj->mm.dirty = true;
 181         }
 182
 183         i915_gem_object_unpin_pages(obj);
 184         return 0;
 185 }
 186
 187 /**
 188  * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
 189  *                                     and possibly write domain.
 190  * @obj: object to act on
 191  * @write: ask for write access or read only
 192  *
 193  * This function returns when the move is complete, including waiting on
 194  * flushes to occur.
 195  */
 196 int
 197 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 198 {
 199         int ret;
 200
 201         assert_object_held(obj);
 202
 203         ret = i915_gem_object_wait(obj,
 204                                    I915_WAIT_INTERRUPTIBLE |
 205                                    (write ? I915_WAIT_ALL : 0),
 206                                    MAX_SCHEDULE_TIMEOUT);
 207         if (ret)
 208                 return ret;
 209
 210         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 211                 return 0;
 212
 213         /* Flush and acquire obj->pages so that we are coherent through
 214          * direct access in memory with previous cached writes through
 215          * shmemfs and that our cache domain tracking remains valid.
 216          * For example, if the obj->filp was moved to swap without us
 217          * being notified and releasing the pages, we would mistakenly
 218          * continue to assume that the obj remained out of the CPU cached
 219          * domain.
 220          */
 221         ret = i915_gem_object_pin_pages(obj);
 222         if (ret)
 223                 return ret;
 224
 225         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 226
 227         /* Serialise direct access to this object with the barriers for
 228          * coherent writes from the GPU, by effectively invalidating the
 229          * GTT domain upon first access.
 230          */
 231         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
 232                 mb();
 233
 234         /* It should now be out of any other write domains, and we can update
 235          * the domain values for our changes.
 236          */
 237         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 238         obj->read_domains |= I915_GEM_DOMAIN_GTT;
 239         if (write) {
 240                 struct i915_vma *vma;
 241
 242                 obj->read_domains = I915_GEM_DOMAIN_GTT;
 243                 obj->write_domain = I915_GEM_DOMAIN_GTT;
 244                 obj->mm.dirty = true;
 245
 246                 spin_lock(&obj->vma.lock);
 247                 for_each_ggtt_vma(vma, obj)
 248                         if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 249                                 i915_vma_set_ggtt_write(vma);
 250                 spin_unlock(&obj->vma.lock);
 251         }
 252
 253         i915_gem_object_unpin_pages(obj);
 254         return 0;
 255 }
 256
 257 /**
 258  * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
 259  * @obj: object to act on
 260  * @cache_level: new cache level to set for the object
 261  *
 262  * After this function returns, the object will be in the new cache-level
 263  * across all GTT and the contents of the backing storage will be coherent,
 264  * with respect to the new cache-level. In order to keep the backing storage
 265  * coherent for all users, we only allow a single cache level to be set
 266  * globally on the object and prevent it from being changed whilst the
 267  * hardware is reading from the object. That is if the object is currently
 268  * on the scanout it will be set to uncached (or equivalent display
 269  * cache coherency) and all non-MOCS GPU access will also be uncached so
 270  * that all direct access to the scanout remains coherent.
 271  */
 272 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 273                                     enum i915_cache_level cache_level)
 274 {
 275         int ret;
 276
 277         /*
 278          * For objects created by userspace through GEM_CREATE with pat_index
 279          * set by set_pat extension, simply return 0 here without touching
 280          * the cache setting, because such objects should have an immutable
 281          * cache setting by desgin and always managed by userspace.
 282          */
 283         if (i915_gem_object_has_cache_level(obj, cache_level))
 284                 return 0;
 285
 286         ret = i915_gem_object_wait(obj,
 287                                    I915_WAIT_INTERRUPTIBLE |
 288                                    I915_WAIT_ALL,
 289                                    MAX_SCHEDULE_TIMEOUT);
 290         if (ret)
 291                 return ret;
 292
 293         /* Always invalidate stale cachelines */
 294         i915_gem_object_set_cache_coherency(obj, cache_level);
 295         obj->cache_dirty = true;
 296
 297         /* The cache-level will be applied when each vma is rebound. */
 298         return i915_gem_object_unbind(obj,
 299                                       I915_GEM_OBJECT_UNBIND_ACTIVE |
 300                                       I915_GEM_OBJECT_UNBIND_BARRIER);
 301 }
 302
 303 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 304                                struct drm_file *file)
 305 {
 306         struct drm_i915_gem_caching *args = data;
 307         struct drm_i915_gem_object *obj;
 308         int err = 0;
 309
 310         if (IS_DGFX(to_i915(dev)))
 311                 return -ENODEV;
 312
 313         rcu_read_lock();
 314         obj = i915_gem_object_lookup_rcu(file, args->handle);
 315         if (!obj) {
 316                 err = -ENOENT;
 317                 goto out;
 318         }
 319
 320         /*
 321          * This ioctl should be disabled for the objects with pat_index
 322          * set by user space.
 323          */
 324         if (obj->pat_set_by_user) {
 325                 err = -EOPNOTSUPP;
 326                 goto out;
 327         }
 328
 329         if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
 330             i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
 331                 args->caching = I915_CACHING_CACHED;
 332         else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
 333                 args->caching = I915_CACHING_DISPLAY;
 334         else
 335                 args->caching = I915_CACHING_NONE;
 336 out:
 337         rcu_read_unlock();
 338         return err;
 339 }
 340
 341 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 342                                struct drm_file *file)
 343 {
 344         struct drm_i915_private *i915 = to_i915(dev);
 345         struct drm_i915_gem_caching *args = data;
 346         struct drm_i915_gem_object *obj;
 347         enum i915_cache_level level;
 348         int ret = 0;
 349
 350         if (IS_DGFX(i915))
 351                 return -ENODEV;
 352
 353         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
 354                 return -EOPNOTSUPP;
 355
 356         switch (args->caching) {
 357         case I915_CACHING_NONE:
 358                 level = I915_CACHE_NONE;
 359                 break;
 360         case I915_CACHING_CACHED:
 361                 /*
 362                  * Due to a HW issue on BXT A stepping, GPU stores via a
 363                  * snooped mapping may leave stale data in a corresponding CPU
 364                  * cacheline, whereas normally such cachelines would get
 365                  * invalidated.
 366                  */
 367                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
 368                         return -ENODEV;
 369
 370                 level = I915_CACHE_LLC;
 371                 break;
 372         case I915_CACHING_DISPLAY:
 373                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
 374                 break;
 375         default:
 376                 return -EINVAL;
 377         }
 378
 379         obj = i915_gem_object_lookup(file, args->handle);
 380         if (!obj)
 381                 return -ENOENT;
 382
 383         /*
 384          * This ioctl should be disabled for the objects with pat_index
 385          * set by user space.
 386          */
 387         if (obj->pat_set_by_user) {
 388                 ret = -EOPNOTSUPP;
 389                 goto out;
 390         }
 391
 392         /*
 393          * The caching mode of proxy object is handled by its generator, and
 394          * not allowed to be changed by userspace.
 395          */
 396         if (i915_gem_object_is_proxy(obj)) {
 397                 /*
 398                  * Silently allow cached for userptr; the vulkan driver
 399                  * sets all objects to cached
 400                  */
 401                 if (!i915_gem_object_is_userptr(obj) ||
 402                     args->caching != I915_CACHING_CACHED)
 403                         ret = -ENXIO;
 404
 405                 goto out;
 406         }
 407
 408         ret = i915_gem_object_lock_interruptible(obj, NULL);
 409         if (ret)
 410                 goto out;
 411
 412         ret = i915_gem_object_set_cache_level(obj, level);
 413         i915_gem_object_unlock(obj);
 414
 415 out:
 416         i915_gem_object_put(obj);
 417         return ret;
 418 }
 419
 420 /*
 421  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
 422  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
 423  * (for pageflips). We only flush the caches while preparing the buffer for
 424  * display, the callers are responsible for frontbuffer flush.
 425  */
 426 struct i915_vma *
 427 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 428                                      struct i915_gem_ww_ctx *ww,
 429                                      u32 alignment,
 430                                      const struct i915_gtt_view *view,
 431                                      unsigned int flags)
 432 {
 433         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 434         struct i915_vma *vma;
 435         int ret;
 436
 437         /* Frame buffer must be in LMEM */
 438         if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
 439                 return ERR_PTR(-EINVAL);
 440
 441         /*
 442          * The display engine is not coherent with the LLC cache on gen6.  As
 443          * a result, we make sure that the pinning that is about to occur is
 444          * done with uncached PTEs. This is lowest common denominator for all
 445          * chipsets.
 446          *
 447          * However for gen6+, we could do better by using the GFDT bit instead
 448          * of uncaching, which would allow us to flush all the LLC-cached data
 449          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 450          */
 451         ret = i915_gem_object_set_cache_level(obj,
 452                                               HAS_WT(i915) ?
 453                                               I915_CACHE_WT : I915_CACHE_NONE);
 454         if (ret)
 455                 return ERR_PTR(ret);
 456
 457         /* VT-d may overfetch before/after the vma, so pad with scratch */
 458         if (intel_scanout_needs_vtd_wa(i915)) {
 459                 unsigned int guard = VTD_GUARD;
 460
 461                 if (i915_gem_object_is_tiled(obj))
 462                         guard = max(guard,
 463                                     i915_gem_object_get_tile_row_size(obj));
 464
 465                 flags |= PIN_OFFSET_GUARD | guard;
 466         }
 467
 468         /*
 469          * As the user may map the buffer once pinned in the display plane
 470          * (e.g. libkms for the bootup splash), we have to ensure that we
 471          * always use map_and_fenceable for all scanout buffers. However,
 472          * it may simply be too big to fit into mappable, in which case
 473          * put it anyway and hope that userspace can cope (but always first
 474          * try to preserve the existing ABI).
 475          */
 476         vma = ERR_PTR(-ENOSPC);
 477         if ((flags & PIN_MAPPABLE) == 0 &&
 478             (!view || view->type == I915_GTT_VIEW_NORMAL))
 479                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
 480                                                   flags | PIN_MAPPABLE |
 481                                                   PIN_NONBLOCK);
 482         if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
 483                 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
 484                                                   alignment, flags);
 485         if (IS_ERR(vma))
 486                 return vma;
 487
 488         vma->display_alignment = max(vma->display_alignment, alignment);
 489         i915_vma_mark_scanout(vma);
 490
 491         i915_gem_object_flush_if_display_locked(obj);
 492
 493         return vma;
 494 }
 495
 496 /**
 497  * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
 498  *                                     and possibly write domain.
 499  * @obj: object to act on
 500  * @write: requesting write or read-only access
 501  *
 502  * This function returns when the move is complete, including waiting on
 503  * flushes to occur.
 504  */
 505 int
 506 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 507 {
 508         int ret;
 509
 510         assert_object_held(obj);
 511
 512         ret = i915_gem_object_wait(obj,
 513                                    I915_WAIT_INTERRUPTIBLE |
 514                                    (write ? I915_WAIT_ALL : 0),
 515                                    MAX_SCHEDULE_TIMEOUT);
 516         if (ret)
 517                 return ret;
 518
 519         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 520
 521         /* Flush the CPU cache if it's still invalid. */
 522         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 523                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 524                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
 525         }
 526
 527         /* It should now be out of any other write domains, and we can update
 528          * the domain values for our changes.
 529          */
 530         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
 531
 532         /* If we're writing through the CPU, then the GPU read domains will
 533          * need to be invalidated at next use.
 534          */
 535         if (write)
 536                 __start_cpu_write(obj);
 537
 538         return 0;
 539 }
 540
 541 /**
 542  * i915_gem_set_domain_ioctl - Called when user space prepares to use an
 543  *                             object with the CPU, either
 544  * through the mmap ioctl's mapping or a GTT mapping.
 545  * @dev: drm device
 546  * @data: ioctl data blob
 547  * @file: drm file
 548  */
 549 int
 550 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 551                           struct drm_file *file)
 552 {
 553         struct drm_i915_gem_set_domain *args = data;
 554         struct drm_i915_gem_object *obj;
 555         u32 read_domains = args->read_domains;
 556         u32 write_domain = args->write_domain;
 557         int err;
 558
 559         if (IS_DGFX(to_i915(dev)))
 560                 return -ENODEV;
 561
 562         /* Only handle setting domains to types used by the CPU. */
 563         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
 564                 return -EINVAL;
 565
 566         /*
 567          * Having something in the write domain implies it's in the read
 568          * domain, and only that read domain.  Enforce that in the request.
 569          */
 570         if (write_domain && read_domains != write_domain)
 571                 return -EINVAL;
 572
 573         if (!read_domains)
 574                 return 0;
 575
 576         obj = i915_gem_object_lookup(file, args->handle);
 577         if (!obj)
 578                 return -ENOENT;
 579
 580         /*
 581          * Try to flush the object off the GPU without holding the lock.
 582          * We will repeat the flush holding the lock in the normal manner
 583          * to catch cases where we are gazumped.
 584          */
 585         err = i915_gem_object_wait(obj,
 586                                    I915_WAIT_INTERRUPTIBLE |
 587                                    I915_WAIT_PRIORITY |
 588                                    (write_domain ? I915_WAIT_ALL : 0),
 589                                    MAX_SCHEDULE_TIMEOUT);
 590         if (err)
 591                 goto out;
 592
 593         if (i915_gem_object_is_userptr(obj)) {
 594                 /*
 595                  * Try to grab userptr pages, iris uses set_domain to check
 596                  * userptr validity
 597                  */
 598                 err = i915_gem_object_userptr_validate(obj);
 599                 if (!err)
 600                         err = i915_gem_object_wait(obj,
 601                                                    I915_WAIT_INTERRUPTIBLE |
 602                                                    I915_WAIT_PRIORITY |
 603                                                    (write_domain ? I915_WAIT_ALL : 0),
 604                                                    MAX_SCHEDULE_TIMEOUT);
 605                 goto out;
 606         }
 607
 608         /*
 609          * Proxy objects do not control access to the backing storage, ergo
 610          * they cannot be used as a means to manipulate the cache domain
 611          * tracking for that backing storage. The proxy object is always
 612          * considered to be outside of any cache domain.
 613          */
 614         if (i915_gem_object_is_proxy(obj)) {
 615                 err = -ENXIO;
 616                 goto out;
 617         }
 618
 619         err = i915_gem_object_lock_interruptible(obj, NULL);
 620         if (err)
 621                 goto out;
 622
 623         /*
 624          * Flush and acquire obj->pages so that we are coherent through
 625          * direct access in memory with previous cached writes through
 626          * shmemfs and that our cache domain tracking remains valid.
 627          * For example, if the obj->filp was moved to swap without us
 628          * being notified and releasing the pages, we would mistakenly
 629          * continue to assume that the obj remained out of the CPU cached
 630          * domain.
 631          */
 632         err = i915_gem_object_pin_pages(obj);
 633         if (err)
 634                 goto out_unlock;
 635
 636         /*
 637          * Already in the desired write domain? Nothing for us to do!
 638          *
 639          * We apply a little bit of cunning here to catch a broader set of
 640          * no-ops. If obj->write_domain is set, we must be in the same
 641          * obj->read_domains, and only that domain. Therefore, if that
 642          * obj->write_domain matches the request read_domains, we are
 643          * already in the same read/write domain and can skip the operation,
 644          * without having to further check the requested write_domain.
 645          */
 646         if (READ_ONCE(obj->write_domain) == read_domains)
 647                 goto out_unpin;
 648
 649         if (read_domains & I915_GEM_DOMAIN_WC)
 650                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
 651         else if (read_domains & I915_GEM_DOMAIN_GTT)
 652                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 653         else
 654                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 655
 656 out_unpin:
 657         i915_gem_object_unpin_pages(obj);
 658
 659 out_unlock:
 660         i915_gem_object_unlock(obj);
 661
 662         if (!err && write_domain)
 663                 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 664
 665 out:
 666         i915_gem_object_put(obj);
 667         return err;
 668 }
 669
 670 /*
 671  * Pins the specified object's pages and synchronizes the object with
 672  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 673  * flush the object from the CPU cache.
 674  */
 675 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 676                                  unsigned int *needs_clflush)
 677 {
 678         int ret;
 679
 680         *needs_clflush = 0;
 681         if (!i915_gem_object_has_struct_page(obj))
 682                 return -ENODEV;
 683
 684         assert_object_held(obj);
 685
 686         ret = i915_gem_object_wait(obj,
 687                                    I915_WAIT_INTERRUPTIBLE,
 688                                    MAX_SCHEDULE_TIMEOUT);
 689         if (ret)
 690                 return ret;
 691
 692         ret = i915_gem_object_pin_pages(obj);
 693         if (ret)
 694                 return ret;
 695
 696         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 697             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 698                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 699                 if (ret)
 700                         goto err_unpin;
 701                 else
 702                         goto out;
 703         }
 704
 705         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 706
 707         /* If we're not in the cpu read domain, set ourself into the gtt
 708          * read domain and manually flush cachelines (if required). This
 709          * optimizes for the case when the gpu will dirty the data
 710          * anyway again before the next pread happens.
 711          */
 712         if (!obj->cache_dirty &&
 713             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 714                 *needs_clflush = CLFLUSH_BEFORE;
 715
 716 out:
 717         /* return with the pages pinned */
 718         return 0;
 719
 720 err_unpin:
 721         i915_gem_object_unpin_pages(obj);
 722         return ret;
 723 }
 724
 725 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 726                                   unsigned int *needs_clflush)
 727 {
 728         int ret;
 729
 730         *needs_clflush = 0;
 731         if (!i915_gem_object_has_struct_page(obj))
 732                 return -ENODEV;
 733
 734         assert_object_held(obj);
 735
 736         ret = i915_gem_object_wait(obj,
 737                                    I915_WAIT_INTERRUPTIBLE |
 738                                    I915_WAIT_ALL,
 739                                    MAX_SCHEDULE_TIMEOUT);
 740         if (ret)
 741                 return ret;
 742
 743         ret = i915_gem_object_pin_pages(obj);
 744         if (ret)
 745                 return ret;
 746
 747         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 748             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 749                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 750                 if (ret)
 751                         goto err_unpin;
 752                 else
 753                         goto out;
 754         }
 755
 756         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 757
 758         /* If we're not in the cpu write domain, set ourself into the
 759          * gtt write domain and manually flush cachelines (as required).
 760          * This optimizes for the case when the gpu will use the data
 761          * right away and we therefore have to clflush anyway.
 762          */
 763         if (!obj->cache_dirty) {
 764                 *needs_clflush |= CLFLUSH_AFTER;
 765
 766                 /*
 767                  * Same trick applies to invalidate partially written
 768                  * cachelines read before writing.
 769                  */
 770                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 771                         *needs_clflush |= CLFLUSH_BEFORE;
 772         }
 773
 774 out:
 775         i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 776         obj->mm.dirty = true;
 777         /* return with the pages pinned */
 778         return 0;
 779
 780 err_unpin:
 781         i915_gem_object_unpin_pages(obj);
 782         return ret;
 783 }