]> Git Repo - linux.git/commitdiff
Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm...
authorDave Airlie <[email protected]>
Tue, 8 Sep 2020 21:53:59 +0000 (07:53 +1000)
committerDave Airlie <[email protected]>
Tue, 8 Sep 2020 21:55:22 +0000 (07:55 +1000)
(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added)

UAPI Changes:
(- Potential implicit changes from WW locking refactoring)

Cross-subsystem Changes:
(- WW locking changes should align the i915 locking more with others)

Driver Changes:

- MAJOR: Apply WW locking across the driver (Maarten)

- Reverts for 5 commits to make applying WW locking faster (Maarten)
- Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris)
- Add missing dma_fence_put() for error case of syncobj timeline (Chris)
- Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten)
- Pin engine before pinning all objects (Maarten)
- Rework intel_context pinning to do everything outside of pin_mutex (Maarten)

- Avoid tracking GEM context until registered (Cc: stable, Chris)
- Provide a fastpath for waiting on vma bindings (Chris)
- Fixes to preempt-to-busy mechanism (Chris)
- Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris)
- Switch to object allocations for page directories (Chris)
- Hold context/request reference while breadcrumbs are active (Chris)
- Make sure execbuffer always passes ww state to i915_vma_pin (Maarten)

- Code refactoring to facilitate use of WW locking (Maarten)
- Locking refactoring to use more granular locking (Maarten, Chris)
- Support for multiple pinned timelines per engine (Chris)
- Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris)
- Make active tracking/vma page-directory stash work preallocated (Chris)
- Avoid flushing submission tasklet too often (Chris)
- Reduce context termination list iteration guard to RCU (Chris)
- Reductions to locking contention (Chris)
- Fixes for issues found by CI (Chris)

Signed-off-by: Dave Airlie <[email protected]>
From: Joonas Lahtinen <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1  2 
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_ggtt.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c

index d60efb8f4ba38553836a9671ec05575f618f56fc,c8b1dd1a9e467a876978eb7d21e59751280ad002..dc622af8695cd9522da5cf6dfc8b8b0038b75499
@@@ -2030,12 -2030,12 +2030,12 @@@ intel_tile_width_bytes(const struct drm
        case I915_FORMAT_MOD_Y_TILED_CCS:
                if (is_ccs_plane(fb, color_plane))
                        return 128;
 -              /* fall through */
 +              fallthrough;
        case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
        case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
                if (is_ccs_plane(fb, color_plane))
                        return 64;
 -              /* fall through */
 +              fallthrough;
        case I915_FORMAT_MOD_Y_TILED:
                if (IS_GEN(dev_priv, 2) || HAS_128_BYTE_Y_TILING(dev_priv))
                        return 128;
        case I915_FORMAT_MOD_Yf_TILED_CCS:
                if (is_ccs_plane(fb, color_plane))
                        return 128;
 -              /* fall through */
 +              fallthrough;
        case I915_FORMAT_MOD_Yf_TILED:
                switch (cpp) {
                case 1:
@@@ -2186,7 -2186,7 +2186,7 @@@ static unsigned int intel_surf_alignmen
        case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
                if (is_semiplanar_uv_plane(fb, color_plane))
                        return intel_tile_row_size(fb, color_plane);
 -              /* Fall-through */
 +              fallthrough;
        case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
                return 16 * 1024;
        case I915_FORMAT_MOD_Y_TILED_CCS:
                if (INTEL_GEN(dev_priv) >= 12 &&
                    is_semiplanar_uv_plane(fb, color_plane))
                        return intel_tile_row_size(fb, color_plane);
 -              /* Fall-through */
 +              fallthrough;
        case I915_FORMAT_MOD_Yf_TILED:
                return 1 * 1024 * 1024;
        default:
@@@ -2311,7 -2311,7 +2311,7 @@@ err
  
  void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
  {
-       i915_gem_object_lock(vma->obj);
+       i915_gem_object_lock(vma->obj, NULL);
        if (flags & PLANE_HAS_FENCE)
                i915_vma_unpin_fence(vma);
        i915_gem_object_unpin_from_display_plane(vma);
@@@ -3451,7 -3451,7 +3451,7 @@@ initial_plane_vma(struct drm_i915_priva
        if (IS_ERR(vma))
                goto err_obj;
  
-       if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
+       if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
                goto err_obj;
  
        if (i915_gem_object_is_tiled(obj) &&
@@@ -6252,7 -6252,7 +6252,7 @@@ static int skl_update_scaler_plane(stru
        case DRM_FORMAT_ARGB16161616F:
                if (INTEL_GEN(dev_priv) >= 11)
                        break;
 -              /* fall through */
 +              fallthrough;
        default:
                drm_dbg_kms(&dev_priv->drm,
                            "[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n",
@@@ -10946,7 -10946,7 +10946,7 @@@ static void hsw_get_ddi_pll(struct drm_
                break;
        default:
                MISSING_CASE(ddi_pll_sel);
 -              /* fall through */
 +              fallthrough;
        case PORT_CLK_SEL_NONE:
                return;
        }
@@@ -11006,10 -11006,10 +11006,10 @@@ static bool hsw_get_transcoder_state(st
                        drm_WARN(dev, 1,
                                 "unknown pipe linked to transcoder %s\n",
                                 transcoder_name(panel_transcoder));
 -                      /* fall through */
 +                      fallthrough;
                case TRANS_DDI_EDP_INPUT_A_ONOFF:
                        force_thru = true;
 -                      /* fall through */
 +                      fallthrough;
                case TRANS_DDI_EDP_INPUT_A_ON:
                        trans_pipe = PIPE_A;
                        break;
@@@ -13236,7 -13236,7 +13236,7 @@@ static bool check_digital_port_conflict
                case INTEL_OUTPUT_DDI:
                        if (drm_WARN_ON(dev, !HAS_DDI(to_i915(dev))))
                                break;
 -                      /* else, fall through */
 +                      fallthrough;
                case INTEL_OUTPUT_DP:
                case INTEL_OUTPUT_HDMI:
                case INTEL_OUTPUT_EDP:
@@@ -17194,7 -17194,7 +17194,7 @@@ static int intel_framebuffer_init(struc
        if (!intel_fb->frontbuffer)
                return -ENOMEM;
  
-       i915_gem_object_lock(obj);
+       i915_gem_object_lock(obj, NULL);
        tiling = i915_gem_object_get_tiling(obj);
        stride = i915_gem_object_get_stride(obj);
        i915_gem_object_unlock(obj);
index 322642fb765fd65c7f25f01a19af89fc6b47df35,ae63748e90bddfd7c0af9c5a62ca5bb14a0dad00..804339255df1907eda648d2b38010391de409232
@@@ -41,11 -41,6 +41,6 @@@ struct eb_vma 
        u32 handle;
  };
  
- struct eb_vma_array {
-       struct kref kref;
-       struct eb_vma vma[];
- };
  enum {
        FORCE_CPU_RELOC = 1,
        FORCE_GTT_RELOC,
  #define __EXEC_OBJECT_NEEDS_MAP               BIT(29)
  #define __EXEC_OBJECT_NEEDS_BIAS      BIT(28)
  #define __EXEC_OBJECT_INTERNAL_FLAGS  (~0u << 28) /* all of the above */
+ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  
  #define __EXEC_HAS_RELOC      BIT(31)
- #define __EXEC_INTERNAL_FLAGS (~0u << 31)
+ #define __EXEC_ENGINE_PINNED  BIT(30)
+ #define __EXEC_INTERNAL_FLAGS (~0u << 30)
  #define UPDATE                        PIN_OFFSET_FIXED
  
  #define BATCH_OFFSET_BIAS (256*1024)
@@@ -261,6 -258,8 +258,8 @@@ struct i915_execbuffer 
        /** list of vma that have execobj.relocation_count */
        struct list_head relocs;
  
+       struct i915_gem_ww_ctx ww;
        /**
         * Track the most recently used object for relocations, as we
         * frequently have to perform multiple relocations within the same
                bool has_fence : 1;
                bool needs_unfenced : 1;
  
-               struct i915_vma *target;
                struct i915_request *rq;
-               struct i915_vma *rq_vma;
                u32 *rq_cmd;
                unsigned int rq_size;
+               struct intel_gt_buffer_pool_node *pool;
        } reloc_cache;
  
+       struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+       struct intel_context *reloc_context;
        u64 invalid_flags; /** Set of execobj.flags that are invalid */
        u32 context_flags; /** Set of execobj.flags to insert from the ctx */
  
        u32 batch_start_offset; /** Location within object of batch */
        u32 batch_len; /** Length of batch within object */
        u32 batch_flags; /** Flags composed for emit_bb_start() */
+       struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
  
        /**
         * Indicate either the size of the hastable used to resolve
         */
        int lut_size;
        struct hlist_head *buckets; /** ht for relocation handles */
-       struct eb_vma_array *array;
  
        struct eb_fence *fences;
        unsigned long num_fences;
  };
  
+ static int eb_parse(struct i915_execbuffer *eb);
+ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+                                         bool throttle);
+ static void eb_unpin_engine(struct i915_execbuffer *eb);
  static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
  {
        return intel_engine_requires_cmd_parser(eb->engine) ||
                 eb->args->batch_len);
  }
  
- static struct eb_vma_array *eb_vma_array_create(unsigned int count)
- {
-       struct eb_vma_array *arr;
-       arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
-       if (!arr)
-               return NULL;
-       kref_init(&arr->kref);
-       arr->vma[0].vma = NULL;
-       return arr;
- }
- static inline void eb_unreserve_vma(struct eb_vma *ev)
- {
-       struct i915_vma *vma = ev->vma;
-       if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
-               __i915_vma_unpin_fence(vma);
-       if (ev->flags & __EXEC_OBJECT_HAS_PIN)
-               __i915_vma_unpin(vma);
-       ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
-                      __EXEC_OBJECT_HAS_FENCE);
- }
- static void eb_vma_array_destroy(struct kref *kref)
- {
-       struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
-       struct eb_vma *ev = arr->vma;
-       while (ev->vma) {
-               eb_unreserve_vma(ev);
-               i915_vma_put(ev->vma);
-               ev++;
-       }
-       kvfree(arr);
- }
- static void eb_vma_array_put(struct eb_vma_array *arr)
- {
-       kref_put(&arr->kref, eb_vma_array_destroy);
- }
  static int eb_create(struct i915_execbuffer *eb)
  {
-       /* Allocate an extra slot for use by the command parser + sentinel */
-       eb->array = eb_vma_array_create(eb->buffer_count + 2);
-       if (!eb->array)
-               return -ENOMEM;
-       eb->vma = eb->array->vma;
        if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
                unsigned int size = 1 + ilog2(eb->buffer_count);
  
                                break;
                } while (--size);
  
-               if (unlikely(!size)) {
-                       eb_vma_array_put(eb->array);
+               if (unlikely(!size))
                        return -ENOMEM;
-               }
  
                eb->lut_size = size;
        } else {
@@@ -486,16 -436,17 +436,17 @@@ eb_pin_vma(struct i915_execbuffer *eb
                pin_flags |= PIN_GLOBAL;
  
        /* Attempt to reuse the current location if available */
-       if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+       /* TODO: Add -EDEADLK handling here */
+       if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) {
                if (entry->flags & EXEC_OBJECT_PINNED)
                        return false;
  
                /* Failing that pick any _free_ space if suitable */
-               if (unlikely(i915_vma_pin(vma,
-                                         entry->pad_to_size,
-                                         entry->alignment,
-                                         eb_pin_flags(entry, ev->flags) |
-                                         PIN_USER | PIN_NOEVICT)))
+               if (unlikely(i915_vma_pin_ww(vma, &eb->ww,
+                                            entry->pad_to_size,
+                                            entry->alignment,
+                                            eb_pin_flags(entry, ev->flags) |
+                                            PIN_USER | PIN_NOEVICT)))
                        return false;
        }
  
        return !eb_vma_misplaced(entry, vma, ev->flags);
  }
  
+ static inline void
+ eb_unreserve_vma(struct eb_vma *ev)
+ {
+       if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
+               return;
+       if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+               __i915_vma_unpin_fence(ev->vma);
+       __i915_vma_unpin(ev->vma);
+       ev->flags &= ~__EXEC_OBJECT_RESERVED;
+ }
  static int
  eb_validate_vma(struct i915_execbuffer *eb,
                struct drm_i915_gem_exec_object2 *entry,
@@@ -604,16 -568,6 +568,6 @@@ eb_add_vma(struct i915_execbuffer *eb
  
                eb->batch = ev;
        }
-       if (eb_pin_vma(eb, entry, ev)) {
-               if (entry->offset != vma->node.start) {
-                       entry->offset = vma->node.start | UPDATE;
-                       eb->args->flags |= __EXEC_HAS_RELOC;
-               }
-       } else {
-               eb_unreserve_vma(ev);
-               list_add_tail(&ev->bind_link, &eb->unbound);
-       }
  }
  
  static inline int use_cpu_reloc(const struct reloc_cache *cache,
                obj->cache_level != I915_CACHE_NONE);
  }
  
- static int eb_reserve_vma(const struct i915_execbuffer *eb,
+ static int eb_reserve_vma(struct i915_execbuffer *eb,
                          struct eb_vma *ev,
                          u64 pin_flags)
  {
                        return err;
        }
  
-       err = i915_vma_pin(vma,
+       err = i915_vma_pin_ww(vma, &eb->ww,
                           entry->pad_to_size, entry->alignment,
                           eb_pin_flags(entry, ev->flags) | pin_flags);
        if (err)
@@@ -698,10 -652,6 +652,6 @@@ static int eb_reserve(struct i915_execb
         * This avoid unnecessary unbinding of later objects in order to make
         * room for the earlier objects *unless* we need to defragment.
         */
-       if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
-               return -EINTR;
        pass = 0;
        do {
                list_for_each_entry(ev, &eb->unbound, bind_link) {
                        if (err)
                                break;
                }
-               if (!(err == -ENOSPC || err == -EAGAIN))
-                       break;
+               if (err != -ENOSPC)
+                       return err;
  
                /* Resort *all* the objects into priority order */
                INIT_LIST_HEAD(&eb->unbound);
                }
                list_splice_tail(&last, &eb->unbound);
  
-               if (err == -EAGAIN) {
-                       mutex_unlock(&eb->i915->drm.struct_mutex);
-                       flush_workqueue(eb->i915->mm.userptr_wq);
-                       mutex_lock(&eb->i915->drm.struct_mutex);
-                       continue;
-               }
                switch (pass++) {
                case 0:
                        break;
                        err = i915_gem_evict_vm(eb->context->vm);
                        mutex_unlock(&eb->context->vm->mutex);
                        if (err)
-                               goto unlock;
+                               return err;
                        break;
  
                default:
-                       err = -ENOSPC;
-                       goto unlock;
+                       return -ENOSPC;
                }
  
                pin_flags = PIN_USER;
        } while (1);
- unlock:
-       mutex_unlock(&eb->i915->drm.struct_mutex);
-       return err;
  }
  
  static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
@@@ -893,12 -831,12 +831,12 @@@ static struct i915_vma *eb_lookup_vma(s
  
  static int eb_lookup_vmas(struct i915_execbuffer *eb)
  {
+       struct drm_i915_private *i915 = eb->i915;
        unsigned int batch = eb_batch_index(eb);
        unsigned int i;
        int err = 0;
  
        INIT_LIST_HEAD(&eb->relocs);
-       INIT_LIST_HEAD(&eb->unbound);
  
        for (i = 0; i < eb->buffer_count; i++) {
                struct i915_vma *vma;
                vma = eb_lookup_vma(eb, eb->exec[i].handle);
                if (IS_ERR(vma)) {
                        err = PTR_ERR(vma);
-                       break;
+                       goto err;
                }
  
                err = eb_validate_vma(eb, &eb->exec[i], vma);
                if (unlikely(err)) {
                        i915_vma_put(vma);
-                       break;
+                       goto err;
                }
  
                eb_add_vma(eb, i, batch, vma);
        }
  
+       if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
+               drm_dbg(&i915->drm,
+                       "Attempting to use self-modifying batch buffer\n");
+               return -EINVAL;
+       }
+       if (range_overflows_t(u64,
+                             eb->batch_start_offset, eb->batch_len,
+                             eb->batch->vma->size)) {
+               drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+               return -EINVAL;
+       }
+       if (eb->batch_len == 0)
+               eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+       return 0;
+ err:
        eb->vma[i].vma = NULL;
        return err;
  }
  
+ static int eb_validate_vmas(struct i915_execbuffer *eb)
+ {
+       unsigned int i;
+       int err;
+       INIT_LIST_HEAD(&eb->unbound);
+       for (i = 0; i < eb->buffer_count; i++) {
+               struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+               struct eb_vma *ev = &eb->vma[i];
+               struct i915_vma *vma = ev->vma;
+               err = i915_gem_object_lock(vma->obj, &eb->ww);
+               if (err)
+                       return err;
+               if (eb_pin_vma(eb, entry, ev)) {
+                       if (entry->offset != vma->node.start) {
+                               entry->offset = vma->node.start | UPDATE;
+                               eb->args->flags |= __EXEC_HAS_RELOC;
+                       }
+               } else {
+                       eb_unreserve_vma(ev);
+                       list_add_tail(&ev->bind_link, &eb->unbound);
+                       if (drm_mm_node_allocated(&vma->node)) {
+                               err = i915_vma_unbind(vma);
+                               if (err)
+                                       return err;
+                       }
+               }
+               GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+                          eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+       }
+       if (!list_empty(&eb->unbound))
+               return eb_reserve(eb);
+       return 0;
+ }
  static struct eb_vma *
  eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
  {
        }
  }
  
+ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
+ {
+       const unsigned int count = eb->buffer_count;
+       unsigned int i;
+       for (i = 0; i < count; i++) {
+               struct eb_vma *ev = &eb->vma[i];
+               struct i915_vma *vma = ev->vma;
+               if (!vma)
+                       break;
+               eb_unreserve_vma(ev);
+               if (final)
+                       i915_vma_put(vma);
+       }
+       eb_unpin_engine(eb);
+ }
  static void eb_destroy(const struct i915_execbuffer *eb)
  {
        GEM_BUG_ON(eb->reloc_cache.rq);
  
-       if (eb->array)
-               eb_vma_array_put(eb->array);
        if (eb->lut_size > 0)
                kfree(eb->buckets);
  }
@@@ -960,6 -977,14 +977,14 @@@ relocation_target(const struct drm_i915
        return gen8_canonical_addr((int)reloc->delta + target->node.start);
  }
  
+ static void reloc_cache_clear(struct reloc_cache *cache)
+ {
+       cache->rq = NULL;
+       cache->rq_cmd = NULL;
+       cache->pool = NULL;
+       cache->rq_size = 0;
+ }
  static void reloc_cache_init(struct reloc_cache *cache,
                             struct drm_i915_private *i915)
  {
        cache->has_fence = cache->gen < 4;
        cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
        cache->node.flags = 0;
-       cache->rq = NULL;
-       cache->target = NULL;
+       reloc_cache_clear(cache);
  }
  
  static inline void *unmask_page(unsigned long p)
@@@ -995,132 -1019,60 +1019,60 @@@ static inline struct i915_ggtt *cache_t
        return &i915->ggtt;
  }
  
- #define RELOC_TAIL 4
- static int reloc_gpu_chain(struct reloc_cache *cache)
+ static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
  {
-       struct intel_gt_buffer_pool_node *pool;
-       struct i915_request *rq = cache->rq;
-       struct i915_vma *batch;
-       u32 *cmd;
-       int err;
-       pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
-       if (IS_ERR(pool))
-               return PTR_ERR(pool);
-       batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
-       if (IS_ERR(batch)) {
-               err = PTR_ERR(batch);
-               goto out_pool;
-       }
-       err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
-       if (err)
-               goto out_pool;
-       GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE  / sizeof(u32));
-       cmd = cache->rq_cmd + cache->rq_size;
-       *cmd++ = MI_ARB_CHECK;
-       if (cache->gen >= 8)
-               *cmd++ = MI_BATCH_BUFFER_START_GEN8;
-       else if (cache->gen >= 6)
-               *cmd++ = MI_BATCH_BUFFER_START;
-       else
-               *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-       *cmd++ = lower_32_bits(batch->node.start);
-       *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
-       i915_gem_object_flush_map(cache->rq_vma->obj);
-       i915_gem_object_unpin_map(cache->rq_vma->obj);
-       cache->rq_vma = NULL;
-       err = intel_gt_buffer_pool_mark_active(pool, rq);
-       if (err == 0) {
-               i915_vma_lock(batch);
-               err = i915_request_await_object(rq, batch->obj, false);
-               if (err == 0)
-                       err = i915_vma_move_to_active(batch, rq, 0);
-               i915_vma_unlock(batch);
-       }
-       i915_vma_unpin(batch);
-       if (err)
-               goto out_pool;
-       cmd = i915_gem_object_pin_map(batch->obj,
-                                     cache->has_llc ?
-                                     I915_MAP_FORCE_WB :
-                                     I915_MAP_FORCE_WC);
-       if (IS_ERR(cmd)) {
-               err = PTR_ERR(cmd);
-               goto out_pool;
-       }
-       /* Return with batch mapping (cmd) still pinned */
-       cache->rq_cmd = cmd;
-       cache->rq_size = 0;
-       cache->rq_vma = batch;
- out_pool:
-       intel_gt_buffer_pool_put(pool);
-       return err;
- }
+       if (!cache->pool)
+               return;
  
- static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
- {
-       return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+       /*
+        * This is a bit nasty, normally we keep objects locked until the end
+        * of execbuffer, but we already submit this, and have to unlock before
+        * dropping the reference. Fortunately we can only hold 1 pool node at
+        * a time, so this should be harmless.
+        */
+       i915_gem_ww_unlock_single(cache->pool->obj);
+       intel_gt_buffer_pool_put(cache->pool);
+       cache->pool = NULL;
  }
  
- static int reloc_gpu_flush(struct reloc_cache *cache)
+ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
  {
-       struct i915_request *rq;
-       int err;
-       rq = fetch_and_zero(&cache->rq);
-       if (!rq)
-               return 0;
+       struct drm_i915_gem_object *obj = cache->rq->batch->obj;
  
-       if (cache->rq_vma) {
-               struct drm_i915_gem_object *obj = cache->rq_vma->obj;
+       GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+       cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
  
-               GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
-               cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
+       __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+       i915_gem_object_unpin_map(obj);
  
-               __i915_gem_object_flush_map(obj,
-                                           0, sizeof(u32) * cache->rq_size);
-               i915_gem_object_unpin_map(obj);
-       }
-       err = 0;
-       if (rq->engine->emit_init_breadcrumb)
-               err = rq->engine->emit_init_breadcrumb(rq);
-       if (!err)
-               err = rq->engine->emit_bb_start(rq,
-                                               rq->batch->node.start,
-                                               PAGE_SIZE,
-                                               reloc_bb_flags(cache));
-       if (err)
-               i915_request_set_error_once(rq, err);
+       intel_gt_chipset_flush(cache->rq->engine->gt);
  
-       intel_gt_chipset_flush(rq->engine->gt);
-       i915_request_add(rq);
+       i915_request_add(cache->rq);
+       reloc_cache_put_pool(eb, cache);
+       reloc_cache_clear(cache);
  
-       return err;
+       eb->reloc_pool = NULL;
  }
  
- static void reloc_cache_reset(struct reloc_cache *cache)
+ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
  {
        void *vaddr;
  
+       if (cache->rq)
+               reloc_gpu_flush(eb, cache);
        if (!cache->vaddr)
                return;
  
        vaddr = unmask_page(cache->vaddr);
        if (cache->vaddr & KMAP) {
+               struct drm_i915_gem_object *obj =
+                       (struct drm_i915_gem_object *)cache->node.mm;
                if (cache->vaddr & CLFLUSH_AFTER)
                        mb();
  
                kunmap_atomic(vaddr);
-               i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+               i915_gem_object_finish_access(obj);
        } else {
                struct i915_ggtt *ggtt = cache_to_ggtt(cache);
  
  
  static void *reloc_kmap(struct drm_i915_gem_object *obj,
                        struct reloc_cache *cache,
-                       unsigned long page)
+                       unsigned long pageno)
  {
        void *vaddr;
+       struct page *page;
  
        if (cache->vaddr) {
                kunmap_atomic(unmask_page(cache->vaddr));
                        mb();
        }
  
-       vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+       page = i915_gem_object_get_page(obj, pageno);
+       if (!obj->mm.dirty)
+               set_page_dirty(page);
+       vaddr = kmap_atomic(page);
        cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
-       cache->page = page;
+       cache->page = pageno;
  
        return vaddr;
  }
  
  static void *reloc_iomap(struct drm_i915_gem_object *obj,
-                        struct reloc_cache *cache,
+                        struct i915_execbuffer *eb,
                         unsigned long page)
  {
+       struct reloc_cache *cache = &eb->reloc_cache;
        struct i915_ggtt *ggtt = cache_to_ggtt(cache);
        unsigned long offset;
        void *vaddr;
                if (use_cpu_reloc(cache, obj))
                        return NULL;
  
-               i915_gem_object_lock(obj);
                err = i915_gem_object_set_to_gtt_domain(obj, true);
-               i915_gem_object_unlock(obj);
                if (err)
                        return ERR_PTR(err);
  
-               vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-                                              PIN_MAPPABLE |
-                                              PIN_NONBLOCK /* NOWARN */ |
-                                              PIN_NOEVICT);
+               vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+                                                 PIN_MAPPABLE |
+                                                 PIN_NONBLOCK /* NOWARN */ |
+                                                 PIN_NOEVICT);
+               if (vma == ERR_PTR(-EDEADLK))
+                       return vma;
                if (IS_ERR(vma)) {
                        memset(&cache->node, 0, sizeof(cache->node));
                        mutex_lock(&ggtt->vm.mutex);
  }
  
  static void *reloc_vaddr(struct drm_i915_gem_object *obj,
-                        struct reloc_cache *cache,
+                        struct i915_execbuffer *eb,
                         unsigned long page)
  {
+       struct reloc_cache *cache = &eb->reloc_cache;
        void *vaddr;
  
        if (cache->page == page) {
        } else {
                vaddr = NULL;
                if ((cache->vaddr & KMAP) == 0)
-                       vaddr = reloc_iomap(obj, cache, page);
+                       vaddr = reloc_iomap(obj, eb, page);
                if (!vaddr)
                        vaddr = reloc_kmap(obj, cache, page);
        }
@@@ -1287,7 -1247,7 +1247,7 @@@ static int reloc_move_to_gpu(struct i91
        struct drm_i915_gem_object *obj = vma->obj;
        int err;
  
-       i915_vma_lock(vma);
+       assert_vma_held(vma);
  
        if (obj->cache_dirty & ~obj->cache_coherent)
                i915_gem_clflush_object(obj, 0);
        if (err == 0)
                err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
  
-       i915_vma_unlock(vma);
        return err;
  }
  
  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                             struct intel_engine_cs *engine,
+                            struct i915_vma *vma,
                             unsigned int len)
  {
        struct reloc_cache *cache = &eb->reloc_cache;
-       struct intel_gt_buffer_pool_node *pool;
+       struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
        struct i915_request *rq;
        struct i915_vma *batch;
        u32 *cmd;
        int err;
  
-       pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
-       if (IS_ERR(pool))
-               return PTR_ERR(pool);
+       if (!pool) {
+               pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
+               if (IS_ERR(pool))
+                       return PTR_ERR(pool);
+       }
+       eb->reloc_pool = NULL;
+       err = i915_gem_object_lock(pool->obj, &eb->ww);
+       if (err)
+               goto err_pool;
  
        cmd = i915_gem_object_pin_map(pool->obj,
                                      cache->has_llc ?
                                      I915_MAP_FORCE_WC);
        if (IS_ERR(cmd)) {
                err = PTR_ERR(cmd);
-               goto out_pool;
+               goto err_pool;
        }
  
-       batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
+       batch = i915_vma_instance(pool->obj, vma->vm, NULL);
        if (IS_ERR(batch)) {
                err = PTR_ERR(batch);
                goto err_unmap;
        }
  
-       err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+       err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
        if (err)
                goto err_unmap;
  
        if (engine == eb->context->engine) {
                rq = i915_request_create(eb->context);
        } else {
-               struct intel_context *ce;
+               struct intel_context *ce = eb->reloc_context;
  
-               ce = intel_context_create(engine);
-               if (IS_ERR(ce)) {
-                       err = PTR_ERR(ce);
-                       goto err_unpin;
+               if (!ce) {
+                       ce = intel_context_create(engine);
+                       if (IS_ERR(ce)) {
+                               err = PTR_ERR(ce);
+                               goto err_unpin;
+                       }
+                       i915_vm_put(ce->vm);
+                       ce->vm = i915_vm_get(eb->context->vm);
+                       eb->reloc_context = ce;
                }
  
-               i915_vm_put(ce->vm);
-               ce->vm = i915_vm_get(eb->context->vm);
+               err = intel_context_pin_ww(ce, &eb->ww);
+               if (err)
+                       goto err_unpin;
  
-               rq = intel_context_create_request(ce);
-               intel_context_put(ce);
+               rq = i915_request_create(ce);
+               intel_context_unpin(ce);
        }
        if (IS_ERR(rq)) {
                err = PTR_ERR(rq);
        if (err)
                goto err_request;
  
-       i915_vma_lock(batch);
+       err = reloc_move_to_gpu(rq, vma);
+       if (err)
+               goto err_request;
+       err = eb->engine->emit_bb_start(rq,
+                                       batch->node.start, PAGE_SIZE,
+                                       cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+       if (err)
+               goto skip_request;
+       assert_vma_held(batch);
        err = i915_request_await_object(rq, batch->obj, false);
        if (err == 0)
                err = i915_vma_move_to_active(batch, rq, 0);
-       i915_vma_unlock(batch);
        if (err)
                goto skip_request;
  
        cache->rq = rq;
        cache->rq_cmd = cmd;
        cache->rq_size = 0;
-       cache->rq_vma = batch;
+       cache->pool = pool;
  
        /* Return with batch mapping (cmd) still pinned */
-       goto out_pool;
+       return 0;
  
  skip_request:
        i915_request_set_error_once(rq, err);
@@@ -1389,8 -1371,8 +1371,8 @@@ err_unpin
        i915_vma_unpin(batch);
  err_unmap:
        i915_gem_object_unpin_map(pool->obj);
out_pool:
-       intel_gt_buffer_pool_put(pool);
err_pool:
+       eb->reloc_pool = pool;
        return err;
  }
  
@@@ -1405,9 -1387,12 +1387,12 @@@ static u32 *reloc_gpu(struct i915_execb
  {
        struct reloc_cache *cache = &eb->reloc_cache;
        u32 *cmd;
-       int err;
+       if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
+               reloc_gpu_flush(eb, cache);
  
        if (unlikely(!cache->rq)) {
+               int err;
                struct intel_engine_cs *engine = eb->engine;
  
                if (!reloc_can_use_engine(engine)) {
                                return ERR_PTR(-ENODEV);
                }
  
-               err = __reloc_gpu_alloc(eb, engine, len);
+               err = __reloc_gpu_alloc(eb, engine, vma, len);
                if (unlikely(err))
                        return ERR_PTR(err);
        }
  
-       if (vma != cache->target) {
-               err = reloc_move_to_gpu(cache->rq, vma);
-               if (unlikely(err)) {
-                       i915_request_set_error_once(cache->rq, err);
-                       return ERR_PTR(err);
-               }
-               cache->target = vma;
-       }
-       if (unlikely(cache->rq_size + len >
-                    PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
-               err = reloc_gpu_chain(cache);
-               if (unlikely(err)) {
-                       i915_request_set_error_once(cache->rq, err);
-                       return ERR_PTR(err);
-               }
-       }
-       GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE  / sizeof(u32));
        cmd = cache->rq_cmd + cache->rq_size;
        cache->rq_size += len;
  
@@@ -1472,7 -1437,7 +1437,7 @@@ static unsigned long vma_phys_addr(stru
        return addr + offset_in_page(offset);
  }
  
 -static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 +static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
                              struct i915_vma *vma,
                              u64 offset,
                              u64 target_addr)
                len = 3;
  
        batch = reloc_gpu(eb, vma, len);
-       if (IS_ERR(batch))
+       if (batch == ERR_PTR(-EDEADLK))
+               return (s64)-EDEADLK;
+       else if (IS_ERR(batch))
                return false;
  
        addr = gen8_canonical_addr(vma->node.start + offset);
        return true;
  }
  
- static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+ static int reloc_entry_gpu(struct i915_execbuffer *eb,
                            struct i915_vma *vma,
                            u64 offset,
                            u64 target_addr)
@@@ -1565,14 -1532,17 +1532,17 @@@ relocate_entry(struct i915_vma *vma
  {
        u64 target_addr = relocation_target(reloc, target);
        u64 offset = reloc->offset;
+       int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
+       if (reloc_gpu < 0)
+               return reloc_gpu;
  
-       if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+       if (!reloc_gpu) {
                bool wide = eb->reloc_cache.use_64bit_reloc;
                void *vaddr;
  
  repeat:
-               vaddr = reloc_vaddr(vma->obj,
-                                   &eb->reloc_cache,
+               vaddr = reloc_vaddr(vma->obj, eb,
                                    offset >> PAGE_SHIFT);
                if (IS_ERR(vaddr))
                        return PTR_ERR(vaddr);
@@@ -1723,7 -1693,9 +1693,9 @@@ static int eb_relocate_vma(struct i915_
                 * we would try to acquire the struct mutex again. Obviously
                 * this is bad and so lockdep complains vehemently.
                 */
-               copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
+               pagefault_disable();
+               copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+               pagefault_enable();
                if (unlikely(copied)) {
                        remain = -EFAULT;
                        goto out;
                urelocs += ARRAY_SIZE(stack);
        } while (remain);
  out:
-       reloc_cache_reset(&eb->reloc_cache);
+       reloc_cache_reset(&eb->reloc_cache, eb);
        return remain;
  }
  
- static int eb_relocate(struct i915_execbuffer *eb)
+ static int
+ eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
  {
+       const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+       struct drm_i915_gem_relocation_entry *relocs =
+               u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+       unsigned int i;
        int err;
  
-       err = eb_lookup_vmas(eb);
-       if (err)
-               return err;
+       for (i = 0; i < entry->relocation_count; i++) {
+               u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
  
-       if (!list_empty(&eb->unbound)) {
-               err = eb_reserve(eb);
-               if (err)
-                       return err;
+               if ((s64)offset < 0) {
+                       err = (int)offset;
+                       goto err;
+               }
        }
+       err = 0;
+ err:
+       reloc_cache_reset(&eb->reloc_cache, eb);
+       return err;
+ }
  
-       /* The objects are in their final locations, apply the relocations. */
-       if (eb->args->flags & __EXEC_HAS_RELOC) {
-               struct eb_vma *ev;
-               int flush;
+ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+ {
+       const char __user *addr, *end;
+       unsigned long size;
+       char __maybe_unused c;
  
-               list_for_each_entry(ev, &eb->relocs, reloc_link) {
-                       err = eb_relocate_vma(eb, ev);
-                       if (err)
-                               break;
-               }
+       size = entry->relocation_count;
+       if (size == 0)
+               return 0;
  
-               flush = reloc_gpu_flush(&eb->reloc_cache);
-               if (!err)
-                       err = flush;
-       }
+       if (size > N_RELOC(ULONG_MAX))
+               return -EINVAL;
  
-       return err;
+       addr = u64_to_user_ptr(entry->relocs_ptr);
+       size *= sizeof(struct drm_i915_gem_relocation_entry);
+       if (!access_ok(addr, size))
+               return -EFAULT;
+       end = addr + size;
+       for (; addr < end; addr += PAGE_SIZE) {
+               int err = __get_user(c, addr);
+               if (err)
+                       return err;
+       }
+       return __get_user(c, end - 1);
  }
  
- static int eb_move_to_gpu(struct i915_execbuffer *eb)
+ static int eb_copy_relocations(const struct i915_execbuffer *eb)
  {
+       struct drm_i915_gem_relocation_entry *relocs;
        const unsigned int count = eb->buffer_count;
-       struct ww_acquire_ctx acquire;
        unsigned int i;
-       int err = 0;
-       ww_acquire_init(&acquire, &reservation_ww_class);
+       int err;
  
        for (i = 0; i < count; i++) {
-               struct eb_vma *ev = &eb->vma[i];
-               struct i915_vma *vma = ev->vma;
+               const unsigned int nreloc = eb->exec[i].relocation_count;
+               struct drm_i915_gem_relocation_entry __user *urelocs;
+               unsigned long size;
+               unsigned long copied;
  
-               err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
-               if (err == -EDEADLK) {
-                       GEM_BUG_ON(i == 0);
-                       do {
-                               int j = i - 1;
+               if (nreloc == 0)
+                       continue;
  
-                               ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
+               err = check_relocations(&eb->exec[i]);
+               if (err)
+                       goto err;
  
-                               swap(eb->vma[i],  eb->vma[j]);
-                       } while (--i);
+               urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+               size = nreloc * sizeof(*relocs);
  
-                       err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
-                                                              &acquire);
+               relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+               if (!relocs) {
+                       err = -ENOMEM;
+                       goto err;
                }
-               if (err)
-                       break;
+               /* copy_from_user is limited to < 4GiB */
+               copied = 0;
+               do {
+                       unsigned int len =
+                               min_t(u64, BIT_ULL(31), size - copied);
+                       if (__copy_from_user((char *)relocs + copied,
+                                            (char __user *)urelocs + copied,
+                                            len))
+                               goto end;
+                       copied += len;
+               } while (copied < size);
+               /*
+                * As we do not update the known relocation offsets after
+                * relocating (due to the complexities in lock handling),
+                * we need to mark them as invalid now so that we force the
+                * relocation processing next time. Just in case the target
+                * object is evicted and then rebound into its old
+                * presumed_offset before the next execbuffer - if that
+                * happened we would make the mistake of assuming that the
+                * relocations were valid.
+                */
+               if (!user_access_begin(urelocs, size))
+                       goto end;
+               for (copied = 0; copied < nreloc; copied++)
+                       unsafe_put_user(-1,
+                                       &urelocs[copied].presumed_offset,
+                                       end_user);
+               user_access_end();
+               eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+       }
+       return 0;
+ end_user:
+       user_access_end();
+ end:
+       kvfree(relocs);
+       err = -EFAULT;
+ err:
+       while (i--) {
+               relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+               if (eb->exec[i].relocation_count)
+                       kvfree(relocs);
+       }
+       return err;
+ }
+ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+ {
+       const unsigned int count = eb->buffer_count;
+       unsigned int i;
+       for (i = 0; i < count; i++) {
+               int err;
+               err = check_relocations(&eb->exec[i]);
+               if (err)
+                       return err;
+       }
+       return 0;
+ }
+ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+                                          struct i915_request *rq)
+ {
+       bool have_copy = false;
+       struct eb_vma *ev;
+       int err = 0;
+ repeat:
+       if (signal_pending(current)) {
+               err = -ERESTARTSYS;
+               goto out;
        }
-       ww_acquire_done(&acquire);
+       /* We may process another execbuffer during the unlock... */
+       eb_release_vmas(eb, false);
+       i915_gem_ww_ctx_fini(&eb->ww);
+       if (rq) {
+               /* nonblocking is always false */
+               if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0) {
+                       i915_request_put(rq);
+                       rq = NULL;
+                       err = -EINTR;
+                       goto err_relock;
+               }
+               i915_request_put(rq);
+               rq = NULL;
+       }
+       /*
+        * We take 3 passes through the slowpatch.
+        *
+        * 1 - we try to just prefault all the user relocation entries and
+        * then attempt to reuse the atomic pagefault disabled fast path again.
+        *
+        * 2 - we copy the user entries to a local buffer here outside of the
+        * local and allow ourselves to wait upon any rendering before
+        * relocations
+        *
+        * 3 - we already have a local copy of the relocation entries, but
+        * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+        */
+       if (!err) {
+               err = eb_prefault_relocations(eb);
+       } else if (!have_copy) {
+               err = eb_copy_relocations(eb);
+               have_copy = err == 0;
+       } else {
+               cond_resched();
+               err = 0;
+       }
+       if (!err)
+               flush_workqueue(eb->i915->mm.userptr_wq);
+ err_relock:
+       i915_gem_ww_ctx_init(&eb->ww, true);
+       if (err)
+               goto out;
+       /* reacquire the objects */
+ repeat_validate:
+       rq = eb_pin_engine(eb, false);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               rq = NULL;
+               goto err;
+       }
+       /* We didn't throttle, should be NULL */
+       GEM_WARN_ON(rq);
+       err = eb_validate_vmas(eb);
+       if (err)
+               goto err;
+       GEM_BUG_ON(!eb->batch);
+       list_for_each_entry(ev, &eb->relocs, reloc_link) {
+               if (!have_copy) {
+                       pagefault_disable();
+                       err = eb_relocate_vma(eb, ev);
+                       pagefault_enable();
+                       if (err)
+                               break;
+               } else {
+                       err = eb_relocate_vma_slow(eb, ev);
+                       if (err)
+                               break;
+               }
+       }
+       if (err == -EDEADLK)
+               goto err;
+       if (err && !have_copy)
+               goto repeat;
+       if (err)
+               goto err;
+       /* as last step, parse the command buffer */
+       err = eb_parse(eb);
+       if (err)
+               goto err;
+       /*
+        * Leave the user relocations as are, this is the painfully slow path,
+        * and we want to avoid the complication of dropping the lock whilst
+        * having buffers reserved in the aperture and so causing spurious
+        * ENOSPC for random operations.
+        */
+ err:
+       if (err == -EDEADLK) {
+               eb_release_vmas(eb, false);
+               err = i915_gem_ww_ctx_backoff(&eb->ww);
+               if (!err)
+                       goto repeat_validate;
+       }
+       if (err == -EAGAIN)
+               goto repeat;
+ out:
+       if (have_copy) {
+               const unsigned int count = eb->buffer_count;
+               unsigned int i;
+               for (i = 0; i < count; i++) {
+                       const struct drm_i915_gem_exec_object2 *entry =
+                               &eb->exec[i];
+                       struct drm_i915_gem_relocation_entry *relocs;
+                       if (!entry->relocation_count)
+                               continue;
+                       relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+                       kvfree(relocs);
+               }
+       }
+       if (rq)
+               i915_request_put(rq);
+       return err;
+ }
+ static int eb_relocate_parse(struct i915_execbuffer *eb)
+ {
+       int err;
+       struct i915_request *rq = NULL;
+       bool throttle = true;
+ retry:
+       rq = eb_pin_engine(eb, throttle);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               rq = NULL;
+               if (err != -EDEADLK)
+                       return err;
+               goto err;
+       }
+       if (rq) {
+               bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+               /* Need to drop all locks now for throttling, take slowpath */
+               err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+               if (err == -ETIME) {
+                       if (nonblock) {
+                               err = -EWOULDBLOCK;
+                               i915_request_put(rq);
+                               goto err;
+                       }
+                       goto slow;
+               }
+               i915_request_put(rq);
+               rq = NULL;
+       }
+       /* only throttle once, even if we didn't need to throttle */
+       throttle = false;
+       err = eb_validate_vmas(eb);
+       if (err == -EAGAIN)
+               goto slow;
+       else if (err)
+               goto err;
+       /* The objects are in their final locations, apply the relocations. */
+       if (eb->args->flags & __EXEC_HAS_RELOC) {
+               struct eb_vma *ev;
+               list_for_each_entry(ev, &eb->relocs, reloc_link) {
+                       err = eb_relocate_vma(eb, ev);
+                       if (err)
+                               break;
+               }
+               if (err == -EDEADLK)
+                       goto err;
+               else if (err)
+                       goto slow;
+       }
+       if (!err)
+               err = eb_parse(eb);
+ err:
+       if (err == -EDEADLK) {
+               eb_release_vmas(eb, false);
+               err = i915_gem_ww_ctx_backoff(&eb->ww);
+               if (!err)
+                       goto retry;
+       }
+       return err;
+ slow:
+       err = eb_relocate_parse_slow(eb, rq);
+       if (err)
+               /*
+                * If the user expects the execobject.offset and
+                * reloc.presumed_offset to be an exact match,
+                * as for using NO_RELOC, then we cannot update
+                * the execobject.offset until we have completed
+                * relocation.
+                */
+               eb->args->flags &= ~__EXEC_HAS_RELOC;
+       return err;
+ }
+ static int eb_move_to_gpu(struct i915_execbuffer *eb)
+ {
+       const unsigned int count = eb->buffer_count;
+       unsigned int i = count;
+       int err = 0;
  
        while (i--) {
                struct eb_vma *ev = &eb->vma[i];
  
                if (err == 0)
                        err = i915_vma_move_to_active(vma, eb->request, flags);
-               i915_vma_unlock(vma);
-               eb_unreserve_vma(ev);
        }
-       ww_acquire_fini(&acquire);
-       eb_vma_array_put(fetch_and_zero(&eb->array));
  
        if (unlikely(err))
                goto err_skip;
@@@ -1950,7 -2242,8 +2242,8 @@@ static int i915_reset_gen7_sol_offsets(
  }
  
  static struct i915_vma *
- shadow_batch_pin(struct drm_i915_gem_object *obj,
+ shadow_batch_pin(struct i915_execbuffer *eb,
+                struct drm_i915_gem_object *obj,
                 struct i915_address_space *vm,
                 unsigned int flags)
  {
        if (IS_ERR(vma))
                return vma;
  
-       err = i915_vma_pin(vma, 0, 0, flags);
+       err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
        if (err)
                return ERR_PTR(err);
  
@@@ -2013,7 -2306,7 +2306,7 @@@ __parser_mark_active(struct i915_vma *v
  {
        struct intel_gt_buffer_pool_node *node = vma->private;
  
-       return i915_active_ref(&node->active, tl, fence);
+       return i915_active_ref(&node->active, tl->fence_context, fence);
  }
  
  static int
@@@ -2077,36 -2370,26 +2370,26 @@@ static int eb_parse_pipeline(struct i91
        if (err)
                goto err_commit;
  
-       err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
-       if (err)
-               goto err_commit;
        err = dma_resv_reserve_shared(pw->batch->resv, 1);
        if (err)
-               goto err_commit_unlock;
+               goto err_commit;
  
        /* Wait for all writes (and relocs) into the batch to complete */
        err = i915_sw_fence_await_reservation(&pw->base.chain,
                                              pw->batch->resv, NULL, false,
                                              0, I915_FENCE_GFP);
        if (err < 0)
-               goto err_commit_unlock;
+               goto err_commit;
  
        /* Keep the batch alive and unwritten as we parse */
        dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
  
-       dma_resv_unlock(pw->batch->resv);
        /* Force execution to wait for completion of the parser */
-       dma_resv_lock(shadow->resv, NULL);
        dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-       dma_resv_unlock(shadow->resv);
  
        dma_fence_work_commit_imm(&pw->base);
        return 0;
  
- err_commit_unlock:
-       dma_resv_unlock(pw->batch->resv);
  err_commit:
        i915_sw_fence_set_error_once(&pw->base.chain, err);
        dma_fence_work_commit_imm(&pw->base);
@@@ -2121,16 -2404,33 +2404,33 @@@ err_free
        return err;
  }
  
+ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+ {
+       /*
+        * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+        * batch" bit. Hence we need to pin secure batches into the global gtt.
+        * hsw should have this fixed, but bdw mucks it up again. */
+       if (eb->batch_flags & I915_DISPATCH_SECURE)
+               return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+       return NULL;
+ }
  static int eb_parse(struct i915_execbuffer *eb)
  {
        struct drm_i915_private *i915 = eb->i915;
-       struct intel_gt_buffer_pool_node *pool;
-       struct i915_vma *shadow, *trampoline;
+       struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
+       struct i915_vma *shadow, *trampoline, *batch;
        unsigned int len;
        int err;
  
-       if (!eb_use_cmdparser(eb))
-               return 0;
+       if (!eb_use_cmdparser(eb)) {
+               batch = eb_dispatch_secure(eb, eb->batch->vma);
+               if (IS_ERR(batch))
+                       return PTR_ERR(batch);
+               goto secure_batch;
+       }
  
        len = eb->batch_len;
        if (!CMDPARSER_USES_GGTT(eb->i915)) {
                len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
        }
  
-       pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
-       if (IS_ERR(pool))
-               return PTR_ERR(pool);
+       if (!pool) {
+               pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
+               if (IS_ERR(pool))
+                       return PTR_ERR(pool);
+               eb->batch_pool = pool;
+       }
  
-       shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+       err = i915_gem_object_lock(pool->obj, &eb->ww);
+       if (err)
+               goto err;
+       shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
        if (IS_ERR(shadow)) {
                err = PTR_ERR(shadow);
                goto err;
        if (CMDPARSER_USES_GGTT(eb->i915)) {
                trampoline = shadow;
  
-               shadow = shadow_batch_pin(pool->obj,
+               shadow = shadow_batch_pin(eb, pool->obj,
                                          &eb->engine->gt->ggtt->vm,
                                          PIN_GLOBAL);
                if (IS_ERR(shadow)) {
                eb->batch_flags |= I915_DISPATCH_SECURE;
        }
  
+       batch = eb_dispatch_secure(eb, shadow);
+       if (IS_ERR(batch)) {
+               err = PTR_ERR(batch);
+               goto err_trampoline;
+       }
        err = eb_parse_pipeline(eb, shadow, trampoline);
        if (err)
-               goto err_trampoline;
+               goto err_unpin_batch;
  
-       eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
-       eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
        eb->batch = &eb->vma[eb->buffer_count++];
-       eb->vma[eb->buffer_count].vma = NULL;
+       eb->batch->vma = i915_vma_get(shadow);
+       eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
  
        eb->trampoline = trampoline;
        eb->batch_start_offset = 0;
  
+ secure_batch:
+       if (batch) {
+               eb->batch = &eb->vma[eb->buffer_count++];
+               eb->batch->flags = __EXEC_OBJECT_HAS_PIN;
+               eb->batch->vma = i915_vma_get(batch);
+       }
        return 0;
  
+ err_unpin_batch:
+       if (batch)
+               i915_vma_unpin(batch);
  err_trampoline:
        if (trampoline)
                i915_vma_unpin(trampoline);
  err_shadow:
        i915_vma_unpin(shadow);
  err:
-       intel_gt_buffer_pool_put(pool);
        return err;
  }
  
- static void
- add_to_client(struct i915_request *rq, struct drm_file *file)
- {
-       struct drm_i915_file_private *file_priv = file->driver_priv;
-       rq->file_priv = file_priv;
-       spin_lock(&file_priv->mm.lock);
-       list_add_tail(&rq->client_link, &file_priv->mm.request_list);
-       spin_unlock(&file_priv->mm.lock);
- }
  static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
  {
        int err;
@@@ -2293,7 -2601,7 +2601,7 @@@ static const enum intel_engine_id user_
        [I915_EXEC_VEBOX]       = VECS0
  };
  
- static struct i915_request *eb_throttle(struct intel_context *ce)
+ static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
  {
        struct intel_ring *ring = ce->ring;
        struct intel_timeline *tl = ce->timeline;
        return i915_request_get(rq);
  }
  
- static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
  {
+       struct intel_context *ce = eb->context;
        struct intel_timeline *tl;
-       struct i915_request *rq;
+       struct i915_request *rq = NULL;
        int err;
  
-       /*
-        * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-        * EIO if the GPU is already wedged.
-        */
-       err = intel_gt_terminally_wedged(ce->engine->gt);
-       if (err)
-               return err;
+       GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
  
        if (unlikely(intel_context_is_banned(ce)))
-               return -EIO;
+               return ERR_PTR(-EIO);
  
        /*
         * Pinning the contexts may generate requests in order to acquire
         * GGTT space, so do this first before we reserve a seqno for
         * ourselves.
         */
-       err = intel_context_pin(ce);
+       err = intel_context_pin_ww(ce, &eb->ww);
        if (err)
-               return err;
+               return ERR_PTR(err);
  
        /*
         * Take a local wakeref for preparing to dispatch the execbuf as
         */
        tl = intel_context_timeline_lock(ce);
        if (IS_ERR(tl)) {
-               err = PTR_ERR(tl);
-               goto err_unpin;
+               intel_context_unpin(ce);
+               return ERR_CAST(tl);
        }
  
        intel_context_enter(ce);
-       rq = eb_throttle(ce);
+       if (throttle)
+               rq = eb_throttle(eb, ce);
        intel_context_timeline_unlock(tl);
  
-       if (rq) {
-               bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-               long timeout;
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (nonblock)
-                       timeout = 0;
-               timeout = i915_request_wait(rq,
-                                           I915_WAIT_INTERRUPTIBLE,
-                                           timeout);
-               i915_request_put(rq);
-               if (timeout < 0) {
-                       err = nonblock ? -EWOULDBLOCK : timeout;
-                       goto err_exit;
-               }
-       }
-       eb->engine = ce->engine;
-       eb->context = ce;
-       return 0;
- err_exit:
-       mutex_lock(&tl->mutex);
-       intel_context_exit(ce);
-       intel_context_timeline_unlock(tl);
- err_unpin:
-       intel_context_unpin(ce);
-       return err;
+       eb->args->flags |= __EXEC_ENGINE_PINNED;
+       return rq;
  }
  
  static void eb_unpin_engine(struct i915_execbuffer *eb)
        struct intel_context *ce = eb->context;
        struct intel_timeline *tl = ce->timeline;
  
+       if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+               return;
+       eb->args->flags &= ~__EXEC_ENGINE_PINNED;
        mutex_lock(&tl->mutex);
        intel_context_exit(ce);
        mutex_unlock(&tl->mutex);
  }
  
  static unsigned int
- eb_select_legacy_ring(struct i915_execbuffer *eb,
-                     struct drm_file *file,
-                     struct drm_i915_gem_execbuffer2 *args)
+ eb_select_legacy_ring(struct i915_execbuffer *eb)
  {
        struct drm_i915_private *i915 = eb->i915;
+       struct drm_i915_gem_execbuffer2 *args = eb->args;
        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
  
        if (user_ring_id != I915_EXEC_BSD &&
                unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
  
                if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-                       bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+                       bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
                } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
                           bsd_idx <= I915_EXEC_BSD_RING2) {
                        bsd_idx >>= I915_EXEC_BSD_SHIFT;
  }
  
  static int
- eb_pin_engine(struct i915_execbuffer *eb,
-             struct drm_file *file,
-             struct drm_i915_gem_execbuffer2 *args)
+ eb_select_engine(struct i915_execbuffer *eb)
  {
        struct intel_context *ce;
        unsigned int idx;
        int err;
  
        if (i915_gem_context_user_engines(eb->gem_context))
-               idx = args->flags & I915_EXEC_RING_MASK;
+               idx = eb->args->flags & I915_EXEC_RING_MASK;
        else
-               idx = eb_select_legacy_ring(eb, file, args);
+               idx = eb_select_legacy_ring(eb);
  
        ce = i915_gem_context_get_engine(eb->gem_context, idx);
        if (IS_ERR(ce))
                return PTR_ERR(ce);
  
-       err = __eb_pin_engine(eb, ce);
-       intel_context_put(ce);
+       intel_gt_pm_get(ce->engine->gt);
  
+       if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+               err = intel_context_alloc_state(ce);
+               if (err)
+                       goto err;
+       }
+       /*
+        * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+        * EIO if the GPU is already wedged.
+        */
+       err = intel_gt_terminally_wedged(ce->engine->gt);
+       if (err)
+               goto err;
+       eb->context = ce;
+       eb->engine = ce->engine;
+       /*
+        * Make sure engine pool stays alive even if we call intel_context_put
+        * during ww handling. The pool is destroyed when last pm reference
+        * is dropped, which breaks our -EDEADLK handling.
+        */
+       return err;
+ err:
+       intel_gt_pm_put(ce->engine->gt);
+       intel_context_put(ce);
        return err;
  }
  
+ static void
+ eb_put_engine(struct i915_execbuffer *eb)
+ {
+       intel_gt_pm_put(eb->engine->gt);
+       intel_context_put(eb->context);
+ }
  static void
  __free_fence_array(struct eb_fence *fences, unsigned int n)
  {
@@@ -2573,6 -2883,7 +2883,7 @@@ add_timeline_fence_array(struct i915_ex
  
                if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
                        DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+                       dma_fence_put(fence);
                        drm_syncobj_put(syncobj);
                        return err;
                }
@@@ -2860,6 -3171,10 +3171,10 @@@ i915_gem_do_execbuffer(struct drm_devic
                args->flags |= __EXEC_HAS_RELOC;
  
        eb.exec = exec;
+       eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+       eb.vma[0].vma = NULL;
+       eb.reloc_pool = eb.batch_pool = NULL;
+       eb.reloc_context = NULL;
  
        eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
        reloc_cache_init(&eb.reloc_cache, eb.i915);
        if (unlikely(err))
                goto err_destroy;
  
-       err = eb_pin_engine(&eb, file, args);
+       err = eb_select_engine(&eb);
        if (unlikely(err))
                goto err_context;
  
-       err = eb_relocate(&eb);
+       err = eb_lookup_vmas(&eb);
+       if (err) {
+               eb_release_vmas(&eb, true);
+               goto err_engine;
+       }
+       i915_gem_ww_ctx_init(&eb.ww, true);
+       err = eb_relocate_parse(&eb);
        if (err) {
                /*
                 * If the user expects the execobject.offset and
                goto err_vma;
        }
  
-       if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
-               drm_dbg(&i915->drm,
-                       "Attempting to use self-modifying batch buffer\n");
-               err = -EINVAL;
-               goto err_vma;
-       }
+       ww_acquire_done(&eb.ww.ctx);
  
-       if (range_overflows_t(u64,
-                             eb.batch_start_offset, eb.batch_len,
-                             eb.batch->vma->size)) {
-               drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
-               err = -EINVAL;
-               goto err_vma;
-       }
-       if (eb.batch_len == 0)
-               eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
-       err = eb_parse(&eb);
-       if (err)
-               goto err_vma;
-       /*
-        * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
-        * batch" bit. Hence we need to pin secure batches into the global gtt.
-        * hsw should have this fixed, but bdw mucks it up again. */
        batch = eb.batch->vma;
-       if (eb.batch_flags & I915_DISPATCH_SECURE) {
-               struct i915_vma *vma;
-               /*
-                * So on first glance it looks freaky that we pin the batch here
-                * outside of the reservation loop. But:
-                * - The batch is already pinned into the relevant ppgtt, so we
-                *   already have the backing storage fully allocated.
-                * - No other BO uses the global gtt (well contexts, but meh),
-                *   so we don't really have issues with multiple objects not
-                *   fitting due to fragmentation.
-                * So this is actually safe.
-                */
-               vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
-               if (IS_ERR(vma)) {
-                       err = PTR_ERR(vma);
-                       goto err_parse;
-               }
-               batch = vma;
-       }
  
        /* All GPU relocation batches must be submitted prior to the user rq */
        GEM_BUG_ON(eb.reloc_cache.rq);
        eb.request = i915_request_create(eb.context);
        if (IS_ERR(eb.request)) {
                err = PTR_ERR(eb.request);
-               goto err_batch_unpin;
+               goto err_vma;
        }
  
        if (in_fence) {
         * to explicitly hold another reference here.
         */
        eb.request->batch = batch;
-       if (batch->private)
-               intel_gt_buffer_pool_mark_active(batch->private, eb.request);
+       if (eb.batch_pool)
+               intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
  
        trace_i915_request_queue(eb.request, eb.batch_flags);
        err = eb_submit(&eb, batch);
  err_request:
-       add_to_client(eb.request, file);
        i915_request_get(eb.request);
        eb_request_add(&eb);
  
        }
        i915_request_put(eb.request);
  
- err_batch_unpin:
-       if (eb.batch_flags & I915_DISPATCH_SECURE)
-               i915_vma_unpin(batch);
- err_parse:
-       if (batch->private)
-               intel_gt_buffer_pool_put(batch->private);
  err_vma:
+       eb_release_vmas(&eb, true);
        if (eb.trampoline)
                i915_vma_unpin(eb.trampoline);
-       eb_unpin_engine(&eb);
+       WARN_ON(err == -EDEADLK);
+       i915_gem_ww_ctx_fini(&eb.ww);
+       if (eb.batch_pool)
+               intel_gt_buffer_pool_put(eb.batch_pool);
+       if (eb.reloc_pool)
+               intel_gt_buffer_pool_put(eb.reloc_pool);
+       if (eb.reloc_context)
+               intel_context_put(eb.reloc_context);
+ err_engine:
+       eb_put_engine(&eb);
  err_context:
        i915_gem_context_put(eb.gem_context);
  err_destroy:
@@@ -3089,7 -3371,7 +3371,7 @@@ err_ext
  
  static size_t eb_element_size(void)
  {
-       return sizeof(struct drm_i915_gem_exec_object2);
+       return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
  }
  
  static bool check_buffer_count(size_t count)
@@@ -3145,7 -3427,9 +3427,9 @@@ i915_gem_execbuffer_ioctl(struct drm_de
        /* Copy in the exec list from userland */
        exec_list = kvmalloc_array(count, sizeof(*exec_list),
                                   __GFP_NOWARN | GFP_KERNEL);
-       exec2_list = kvmalloc_array(count, eb_element_size(),
+       /* Allocate extra slots for use by the command parser */
+       exec2_list = kvmalloc_array(count + 2, eb_element_size(),
                                    __GFP_NOWARN | GFP_KERNEL);
        if (exec_list == NULL || exec2_list == NULL) {
                drm_dbg(&i915->drm,
@@@ -3222,7 -3506,8 +3506,8 @@@ i915_gem_execbuffer2_ioctl(struct drm_d
        if (err)
                return err;
  
-       exec2_list = kvmalloc_array(count, eb_element_size(),
+       /* Allocate extra slots for use by the command parser */
+       exec2_list = kvmalloc_array(count + 2, eb_element_size(),
                                    __GFP_NOWARN | GFP_KERNEL);
        if (exec2_list == NULL) {
                drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
index 753f82d87a3168e1b8a01afb86173e6ff70baf3c,548ed9fb427d31e809fd6634b34a529259df47c9..3d69e51f3e4df0a3002a14d5ebf1995b35fed9a0
@@@ -209,7 -209,7 +209,7 @@@ static vm_fault_t i915_error_to_vmf_fau
        switch (err) {
        default:
                WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err);
 -              /* fallthrough */
 +              fallthrough;
        case -EIO: /* shmemfs failure from swap device */
        case -EFAULT: /* purged object */
        case -ENODEV: /* bad object, how did you get here! */
@@@ -283,37 -283,46 +283,46 @@@ static vm_fault_t vm_fault_gtt(struct v
        struct intel_runtime_pm *rpm = &i915->runtime_pm;
        struct i915_ggtt *ggtt = &i915->ggtt;
        bool write = area->vm_flags & VM_WRITE;
+       struct i915_gem_ww_ctx ww;
        intel_wakeref_t wakeref;
        struct i915_vma *vma;
        pgoff_t page_offset;
        int srcu;
        int ret;
  
-       /* Sanity check that we allow writing into this object */
-       if (i915_gem_object_is_readonly(obj) && write)
-               return VM_FAULT_SIGBUS;
        /* We don't use vmf->pgoff since that has the fake offset */
        page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
  
        trace_i915_gem_object_fault(obj, page_offset, true, write);
  
-       ret = i915_gem_object_pin_pages(obj);
+       wakeref = intel_runtime_pm_get(rpm);
+       i915_gem_ww_ctx_init(&ww, true);
+ retry:
+       ret = i915_gem_object_lock(obj, &ww);
        if (ret)
-               goto err;
+               goto err_rpm;
  
-       wakeref = intel_runtime_pm_get(rpm);
+       /* Sanity check that we allow writing into this object */
+       if (i915_gem_object_is_readonly(obj) && write) {
+               ret = -EFAULT;
+               goto err_rpm;
+       }
  
-       ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+       ret = i915_gem_object_pin_pages(obj);
        if (ret)
                goto err_rpm;
  
+       ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+       if (ret)
+               goto err_pages;
        /* Now pin it into the GTT as needed */
-       vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-                                      PIN_MAPPABLE |
-                                      PIN_NONBLOCK /* NOWARN */ |
-                                      PIN_NOEVICT);
-       if (IS_ERR(vma)) {
+       vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+                                         PIN_MAPPABLE |
+                                         PIN_NONBLOCK /* NOWARN */ |
+                                         PIN_NOEVICT);
+       if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
                /* Use a partial view if it is bigger than available space */
                struct i915_ggtt_view view =
                        compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
                 * all hope that the hardware is able to track future writes.
                 */
  
-               vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-               if (IS_ERR(vma)) {
+               vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+               if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
                        flags = PIN_MAPPABLE;
                        view.type = I915_GGTT_VIEW_PARTIAL;
-                       vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+                       vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
                }
  
                /* The entire mappable GGTT is pinned? Unexpected! */
@@@ -389,10 -398,16 +398,16 @@@ err_unpin
        __i915_vma_unpin(vma);
  err_reset:
        intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+ err_pages:
+       i915_gem_object_unpin_pages(obj);
  err_rpm:
+       if (ret == -EDEADLK) {
+               ret = i915_gem_ww_ctx_backoff(&ww);
+               if (!ret)
+                       goto retry;
+       }
+       i915_gem_ww_ctx_fini(&ww);
        intel_runtime_pm_put(rpm, wakeref);
-       i915_gem_object_unpin_pages(obj);
- err:
        return i915_error_to_vmf_fault(ret);
  }
  
index 9cf4ad78ece6ef6e926aabd01d9155e06b2bb9c4,afde1952c1192abb40ad49fb2a62fd8761a46443..d46db8d8f38e4e85154f85571ac732b656b50213
@@@ -110,20 -110,44 +110,44 @@@ i915_gem_object_put(struct drm_i915_gem
  
  #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
  
- static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+                                        struct i915_gem_ww_ctx *ww,
+                                        bool intr)
  {
-       dma_resv_lock(obj->base.resv, NULL);
+       int ret;
+       if (intr)
+               ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+       else
+               ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+       if (!ret && ww)
+               list_add_tail(&obj->obj_link, &ww->obj_list);
+       if (ret == -EALREADY)
+               ret = 0;
+       if (ret == -EDEADLK)
+               ww->contended = obj;
+       return ret;
  }
  
- static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+ static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+                                      struct i915_gem_ww_ctx *ww)
  {
-       return dma_resv_trylock(obj->base.resv);
+       return __i915_gem_object_lock(obj, ww, ww && ww->intr);
  }
  
- static inline int
i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
                                                   struct i915_gem_ww_ctx *ww)
  {
-       return dma_resv_lock_interruptible(obj->base.resv, NULL);
+       WARN_ON(ww && !ww->intr);
+       return __i915_gem_object_lock(obj, ww, true);
+ }
+ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+ {
+       return dma_resv_trylock(obj->base.resv);
  }
  
  static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
@@@ -258,10 -282,6 +282,10 @@@ struct page 
  i915_gem_object_get_page(struct drm_i915_gem_object *obj,
                         unsigned int n);
  
 +struct page *
 +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
 +                             unsigned int n);
 +
  dma_addr_t
  i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
                                    unsigned long n,
@@@ -412,7 -432,6 +436,6 @@@ static inline voi
  i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
  {
        i915_gem_object_unpin_pages(obj);
-       i915_gem_object_unlock(obj);
  }
  
  static inline struct intel_engine_cs *
@@@ -435,6 -454,7 +458,7 @@@ i915_gem_object_last_write_engine(struc
  void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
                                         unsigned int cache_level);
  void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
  
  int __must_check
  i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
index 26087dd7978245a639a522bdba8b0e212f72e227,f231edd3fa3aafc694e501c649f638afcc973fca..5bfb5f7ed02c9aa1e78b0a7a0d06b6a0bc8447ab
@@@ -28,6 -28,7 +28,7 @@@
  
  #include "i915_drv.h"
  
+ #include "intel_breadcrumbs.h"
  #include "intel_context.h"
  #include "intel_engine.h"
  #include "intel_engine_pm.h"
@@@ -213,7 -214,7 +214,7 @@@ u32 intel_engine_context_size(struct in
                break;
        default:
                MISSING_CASE(class);
 -              /* fall through */
 +              fallthrough;
        case VIDEO_DECODE_CLASS:
        case VIDEO_ENHANCEMENT_CLASS:
        case COPY_ENGINE_CLASS:
@@@ -634,7 -635,7 +635,7 @@@ static int pin_ggtt_status_page(struct 
        else
                flags = PIN_HIGH;
  
-       return i915_ggtt_pin(vma, 0, flags);
+       return i915_ggtt_pin(vma, NULL, 0, flags);
  }
  
  static int init_status_page(struct intel_engine_cs *engine)
@@@ -700,8 -701,13 +701,13 @@@ static int engine_setup_common(struct i
        if (err)
                return err;
  
+       engine->breadcrumbs = intel_breadcrumbs_create(engine);
+       if (!engine->breadcrumbs) {
+               err = -ENOMEM;
+               goto err_status;
+       }
        intel_engine_init_active(engine, ENGINE_PHYSICAL);
-       intel_engine_init_breadcrumbs(engine);
        intel_engine_init_execlists(engine);
        intel_engine_init_cmd_parser(engine);
        intel_engine_init__pm(engine);
        intel_engine_init_ctx_wa(engine);
  
        return 0;
+ err_status:
+       cleanup_status_page(engine);
+       return err;
  }
  
  struct measure_breadcrumb {
@@@ -785,9 -795,11 +795,11 @@@ intel_engine_init_active(struct intel_e
  }
  
  static struct intel_context *
- create_kernel_context(struct intel_engine_cs *engine)
+ create_pinned_context(struct intel_engine_cs *engine,
+                     unsigned int hwsp,
+                     struct lock_class_key *key,
+                     const char *name)
  {
-       static struct lock_class_key kernel;
        struct intel_context *ce;
        int err;
  
                return ce;
  
        __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+       ce->timeline = page_pack_bits(NULL, hwsp);
  
        err = intel_context_pin(ce); /* perma-pin so it is always available */
        if (err) {
         * should we need to inject GPU operations during their request
         * construction.
         */
-       lockdep_set_class(&ce->timeline->mutex, &kernel);
+       lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
  
        return ce;
  }
  
+ static struct intel_context *
+ create_kernel_context(struct intel_engine_cs *engine)
+ {
+       static struct lock_class_key kernel;
+       return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
+                                    &kernel, "kernel_context");
+ }
  /**
   * intel_engines_init_common - initialize cengine state which might require hw access
   * @engine: Engine to initialize.
@@@ -902,9 -924,9 +924,9 @@@ void intel_engine_cleanup_common(struc
        tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
  
        cleanup_status_page(engine);
+       intel_breadcrumbs_free(engine->breadcrumbs);
  
        intel_engine_fini_retire(engine);
-       intel_engine_fini_breadcrumbs(engine);
        intel_engine_cleanup_cmd_parser(engine);
  
        if (engine->default_state)
index 99e28d9021e8129928b24c9f7d4eee17855bf7a7,33a3f627ddb1cafb8cc3adf77b2ddcabbdf83e6a..81c05f551b9c885aa0566dc3cec607392b9b5060
@@@ -78,8 -78,6 +78,6 @@@ int i915_ggtt_init_hw(struct drm_i915_p
  {
        int ret;
  
-       stash_init(&i915->mm.wc_stash);
        /*
         * Note that we use page colouring to enforce a guard page at the
         * end of the address space. This is required as the CS may prefetch
@@@ -232,7 -230,7 +230,7 @@@ static void gen8_ggtt_insert_entries(st
  
        /* Fill the allocated but "unused" space beyond the end of the buffer */
        while (gte < end)
-               gen8_set_pte(gte++, vm->scratch[0].encode);
+               gen8_set_pte(gte++, vm->scratch[0]->encode);
  
        /*
         * We want to flush the TLBs only after we're certain all the PTE
@@@ -283,7 -281,7 +281,7 @@@ static void gen6_ggtt_insert_entries(st
  
        /* Fill the allocated but "unused" space beyond the end of the buffer */
        while (gte < end)
-               iowrite32(vm->scratch[0].encode, gte++);
+               iowrite32(vm->scratch[0]->encode, gte++);
  
        /*
         * We want to flush the TLBs only after we're certain all the PTE
@@@ -303,7 -301,7 +301,7 @@@ static void gen8_ggtt_clear_range(struc
        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
        unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-       const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+       const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
        gen8_pte_t __iomem *gtt_base =
                (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@@@ -401,7 -399,7 +399,7 @@@ static void gen6_ggtt_clear_range(struc
                 first_entry, num_entries, max_entries))
                num_entries = max_entries;
  
-       scratch_pte = vm->scratch[0].encode;
+       scratch_pte = vm->scratch[0]->encode;
        for (i = 0; i < num_entries; i++)
                iowrite32(scratch_pte, &gtt_base[i]);
  }
@@@ -436,16 -434,17 +434,17 @@@ static void i915_ggtt_clear_range(struc
        intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
  }
  
- static int ggtt_bind_vma(struct i915_address_space *vm,
-                        struct i915_vma *vma,
-                        enum i915_cache_level cache_level,
-                        u32 flags)
+ static void ggtt_bind_vma(struct i915_address_space *vm,
+                         struct i915_vm_pt_stash *stash,
+                         struct i915_vma *vma,
+                         enum i915_cache_level cache_level,
+                         u32 flags)
  {
        struct drm_i915_gem_object *obj = vma->obj;
        u32 pte_flags;
  
        if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
-               return 0;
+               return;
  
        /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
        pte_flags = 0;
  
        vm->insert_entries(vm, vma, cache_level, pte_flags);
        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-       return 0;
  }
  
  static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@@@ -568,31 -565,25 +565,25 @@@ err
        return ret;
  }
  
- static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
-                                struct i915_vma *vma,
-                                enum i915_cache_level cache_level,
-                                u32 flags)
+ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
+                                 struct i915_vm_pt_stash *stash,
+                                 struct i915_vma *vma,
+                                 enum i915_cache_level cache_level,
+                                 u32 flags)
  {
        u32 pte_flags;
-       int ret;
  
        /* Currently applicable only to VLV */
        pte_flags = 0;
        if (i915_gem_object_is_readonly(vma->obj))
                pte_flags |= PTE_READ_ONLY;
  
-       if (flags & I915_VMA_LOCAL_BIND) {
-               struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
-               ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
-               if (ret)
-                       return ret;
-       }
+       if (flags & I915_VMA_LOCAL_BIND)
+               ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
+                              stash, vma, cache_level, flags);
  
        if (flags & I915_VMA_GLOBAL_BIND)
                vm->insert_entries(vm, vma, cache_level, pte_flags);
-       return 0;
  }
  
  static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
  
  static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
  {
+       struct i915_vm_pt_stash stash = {};
        struct i915_ppgtt *ppgtt;
        int err;
  
                goto err_ppgtt;
        }
  
+       err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
+       if (err)
+               goto err_ppgtt;
+       err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
+       if (err)
+               goto err_stash;
        /*
         * Note we only pre-allocate as far as the end of the global
         * GTT. On 48b / 4-level page-tables, the difference is very,
         * very significant! We have to preallocate as GVT/vgpu does
         * not like the page directory disappearing.
         */
-       err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
-       if (err)
-               goto err_ppgtt;
+       ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
  
        ggtt->alias = ppgtt;
        ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
        GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
        ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
  
+       i915_vm_free_pt_stash(&ppgtt->vm, &stash);
        return 0;
  
+ err_stash:
+       i915_vm_free_pt_stash(&ppgtt->vm, &stash);
  err_ppgtt:
        i915_vm_put(&ppgtt->vm);
        return err;
@@@ -715,18 -716,11 +716,11 @@@ static void ggtt_cleanup_hw(struct i915
  void i915_ggtt_driver_release(struct drm_i915_private *i915)
  {
        struct i915_ggtt *ggtt = &i915->ggtt;
-       struct pagevec *pvec;
  
        fini_aliasing_ppgtt(ggtt);
  
        intel_ggtt_fini_fences(ggtt);
        ggtt_cleanup_hw(ggtt);
-       pvec = &i915->mm.wc_stash.pvec;
-       if (pvec->nr) {
-               set_pages_array_wb(pvec->pages, pvec->nr);
-               __pagevec_release(pvec);
-       }
  }
  
  static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@@ -789,7 -783,7 +783,7 @@@ static int ggtt_probe_common(struct i91
                return -ENOMEM;
        }
  
-       ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+       ret = setup_scratch_page(&ggtt->vm);
        if (ret) {
                drm_err(&i915->drm, "Scratch setup failed\n");
                /* iounmap will also get called at remove, but meh */
                return ret;
        }
  
-       ggtt->vm.scratch[0].encode =
-               ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+       ggtt->vm.scratch[0]->encode =
+               ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
                                    I915_CACHE_NONE, 0);
  
        return 0;
@@@ -824,7 -818,7 +818,7 @@@ static void gen6_gmch_remove(struct i91
        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  
        iounmap(ggtt->gsm);
-       cleanup_scratch_page(vm);
+       free_scratch(vm);
  }
  
  static struct resource pci_resource(struct pci_dev *pdev, int bar)
@@@ -852,6 -846,8 +846,8 @@@ static int gen8_gmch_probe(struct i915_
        else
                size = gen8_get_total_gtt_size(snb_gmch_ctl);
  
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
        ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
        ggtt->vm.cleanup = gen6_gmch_remove;
        ggtt->vm.insert_page = gen8_ggtt_insert_page;
@@@ -1000,6 -996,8 +996,8 @@@ static int gen6_gmch_probe(struct i915_
        size = gen6_get_total_gtt_size(snb_gmch_ctl);
        ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
  
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
        ggtt->vm.clear_range = nop_clear_range;
        if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
                ggtt->vm.clear_range = gen6_ggtt_clear_range;
@@@ -1050,6 -1048,8 +1048,8 @@@ static int i915_gmch_probe(struct i915_
        ggtt->gmadr =
                (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
  
+       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
        ggtt->do_idle_maps = needs_idle_maps(i915);
        ggtt->vm.insert_page = i915_ggtt_insert_page;
        ggtt->vm.insert_entries = i915_ggtt_insert_entries;
@@@ -1165,11 -1165,6 +1165,6 @@@ void i915_ggtt_disable_guc(struct i915_
        ggtt->invalidate(ggtt);
  }
  
- static unsigned int clear_bind(struct i915_vma *vma)
- {
-       return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
- }
  void i915_ggtt_resume(struct i915_ggtt *ggtt)
  {
        struct i915_vma *vma;
        /* clflush objects bound into the GGTT and rebind them. */
        list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
                struct drm_i915_gem_object *obj = vma->obj;
-               unsigned int was_bound = clear_bind(vma);
+               unsigned int was_bound =
+                       atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
  
-               WARN_ON(i915_vma_bind(vma,
-                                     obj ? obj->cache_level : 0,
-                                     was_bound, NULL));
+               GEM_BUG_ON(!was_bound);
+               vma->ops->bind_vma(&ggtt->vm, NULL, vma,
+                                  obj ? obj->cache_level : 0,
+                                  was_bound);
                if (obj) { /* only used during resume => exclusive access */
                        flush |= fetch_and_zero(&obj->write_domain);
                        obj->read_domains |= I915_GEM_DOMAIN_GTT;
@@@ -1437,7 -1434,7 +1434,7 @@@ i915_get_ggtt_vma_pages(struct i915_vm
        switch (vma->ggtt_view.type) {
        default:
                GEM_BUG_ON(vma->ggtt_view.type);
 -              /* fall through */
 +              fallthrough;
        case I915_GGTT_VIEW_NORMAL:
                vma->pages = vma->obj->mm.pages;
                return 0;
index 898593ca48898d9c543be2c691ee4c3cab8a2f0e,1ca1bac81cf61a75726f1045fe5068eed957f983..16b48e72c36910e846a8239dbaf8ee9ef8ef965a
@@@ -32,6 -32,7 +32,7 @@@
  #include "gen6_ppgtt.h"
  #include "gen7_renderclear.h"
  #include "i915_drv.h"
+ #include "intel_breadcrumbs.h"
  #include "intel_context.h"
  #include "intel_gt.h"
  #include "intel_reset.h"
@@@ -100,7 -101,7 +101,7 @@@ static void set_hwsp(struct intel_engin
                 */
                default:
                        GEM_BUG_ON(engine->id);
 -                      /* fallthrough */
 +                      fallthrough;
                case RCS0:
                        hwsp = RENDER_HWS_PGA_GEN7;
                        break;
@@@ -201,16 -202,18 +202,18 @@@ static struct i915_address_space *vm_al
        return vm;
  }
  
+ static u32 pp_dir(struct i915_address_space *vm)
+ {
+       return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
+ }
  static void set_pp_dir(struct intel_engine_cs *engine)
  {
        struct i915_address_space *vm = vm_alias(engine->gt->vm);
  
        if (vm) {
-               struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
                ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
-               ENGINE_WRITE(engine, RING_PP_DIR_BASE,
-                            px_base(ppgtt->pd)->ggtt_offset << 10);
+               ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm));
        }
  }
  
@@@ -255,7 -258,7 +258,7 @@@ static int xcs_resume(struct intel_engi
        else
                ring_setup_status_page(engine);
  
-       intel_engine_reset_breadcrumbs(engine);
+       intel_breadcrumbs_reset(engine->breadcrumbs);
  
        /* Enforce ordering by reading HEAD register back */
        ENGINE_POSTING_READ(engine, RING_HEAD);
@@@ -474,14 -477,16 +477,16 @@@ static void ring_context_destroy(struc
        intel_context_free(ce);
  }
  
- static int __context_pin_ppgtt(struct intel_context *ce)
+ static int ring_context_pre_pin(struct intel_context *ce,
+                               struct i915_gem_ww_ctx *ww,
+                               void **unused)
  {
        struct i915_address_space *vm;
        int err = 0;
  
        vm = vm_alias(ce->vm);
        if (vm)
-               err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
+               err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
  
        return err;
  }
@@@ -496,6 -501,10 +501,10 @@@ static void __context_unpin_ppgtt(struc
  }
  
  static void ring_context_unpin(struct intel_context *ce)
+ {
+ }
+ static void ring_context_post_unpin(struct intel_context *ce)
  {
        __context_unpin_ppgtt(ce);
  }
@@@ -584,9 -593,9 +593,9 @@@ static int ring_context_alloc(struct in
        return 0;
  }
  
- static int ring_context_pin(struct intel_context *ce)
+ static int ring_context_pin(struct intel_context *ce, void *unused)
  {
-       return __context_pin_ppgtt(ce);
+       return 0;
  }
  
  static void ring_context_reset(struct intel_context *ce)
  static const struct intel_context_ops ring_context_ops = {
        .alloc = ring_context_alloc,
  
+       .pre_pin = ring_context_pre_pin,
        .pin = ring_context_pin,
        .unpin = ring_context_unpin,
+       .post_unpin = ring_context_post_unpin,
  
        .enter = intel_context_enter_engine,
        .exit = intel_context_exit_engine,
  };
  
  static int load_pd_dir(struct i915_request *rq,
-                      const struct i915_ppgtt *ppgtt,
+                      struct i915_address_space *vm,
                       u32 valid)
  {
        const struct intel_engine_cs * const engine = rq->engine;
  
        *cs++ = MI_LOAD_REGISTER_IMM(1);
        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-       *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
+       *cs++ = pp_dir(vm);
  
        /* Stall until the page table load is complete? */
        *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@@@ -826,7 -837,7 +837,7 @@@ static int switch_mm(struct i915_reques
         * post-sync op, this extra pass appears vital before a
         * mm switch!
         */
-       ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+       ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
        if (ret)
                return ret;
  
@@@ -1250,14 -1261,15 +1261,15 @@@ int intel_ring_submission_setup(struct 
                return -ENODEV;
        }
  
-       timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
+       timeline = intel_timeline_create_from_engine(engine,
+                                                    I915_GEM_HWS_SEQNO_ADDR);
        if (IS_ERR(timeline)) {
                err = PTR_ERR(timeline);
                goto err;
        }
        GEM_BUG_ON(timeline->has_initial_breadcrumb);
  
-       err = intel_timeline_pin(timeline);
+       err = intel_timeline_pin(timeline, NULL);
        if (err)
                goto err_timeline;
  
                goto err_timeline_unpin;
        }
  
-       err = intel_ring_pin(ring);
+       err = intel_ring_pin(ring, NULL);
        if (err)
                goto err_ring;
  
This page took 0.253061 seconds and 4 git commands to generate.