drivers/gpu/drm/i915/gt/intel_timeline.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2016-2018 Intel Corporation
   4  */
   5
   6 #include <drm/drm_cache.h>
   7
   8 #include "gem/i915_gem_internal.h"
   9
  10 #include "i915_active.h"
  11 #include "i915_drv.h"
  12 #include "i915_syncmap.h"
  13 #include "intel_gt.h"
  14 #include "intel_ring.h"
  15 #include "intel_timeline.h"
  16
  17 #define TIMELINE_SEQNO_BYTES 8
  18
  19 static struct i915_vma *hwsp_alloc(struct intel_gt *gt)
  20 {
  21         struct drm_i915_private *i915 = gt->i915;
  22         struct drm_i915_gem_object *obj;
  23         struct i915_vma *vma;
  24
  25         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  26         if (IS_ERR(obj))
  27                 return ERR_CAST(obj);
  28
  29         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  30
  31         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  32         if (IS_ERR(vma))
  33                 i915_gem_object_put(obj);
  34
  35         return vma;
  36 }
  37
  38 static void __timeline_retire(struct i915_active *active)
  39 {
  40         struct intel_timeline *tl =
  41                 container_of(active, typeof(*tl), active);
  42
  43         i915_vma_unpin(tl->hwsp_ggtt);
  44         intel_timeline_put(tl);
  45 }
  46
  47 static int __timeline_active(struct i915_active *active)
  48 {
  49         struct intel_timeline *tl =
  50                 container_of(active, typeof(*tl), active);
  51
  52         __i915_vma_pin(tl->hwsp_ggtt);
  53         intel_timeline_get(tl);
  54         return 0;
  55 }
  56
  57 I915_SELFTEST_EXPORT int
  58 intel_timeline_pin_map(struct intel_timeline *timeline)
  59 {
  60         struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj;
  61         u32 ofs = offset_in_page(timeline->hwsp_offset);
  62         void *vaddr;
  63
  64         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
  65         if (IS_ERR(vaddr))
  66                 return PTR_ERR(vaddr);
  67
  68         timeline->hwsp_map = vaddr;
  69         timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
  70         drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
  71
  72         return 0;
  73 }
  74
  75 static int intel_timeline_init(struct intel_timeline *timeline,
  76                                struct intel_gt *gt,
  77                                struct i915_vma *hwsp,
  78                                unsigned int offset)
  79 {
  80         kref_init(&timeline->kref);
  81         atomic_set(&timeline->pin_count, 0);
  82
  83         timeline->gt = gt;
  84
  85         if (hwsp) {
  86                 timeline->hwsp_offset = offset;
  87                 timeline->hwsp_ggtt = i915_vma_get(hwsp);
  88         } else {
  89                 timeline->has_initial_breadcrumb = true;
  90                 hwsp = hwsp_alloc(gt);
  91                 if (IS_ERR(hwsp))
  92                         return PTR_ERR(hwsp);
  93                 timeline->hwsp_ggtt = hwsp;
  94         }
  95
  96         timeline->hwsp_map = NULL;
  97         timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset;
  98
  99         GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
 100
 101         timeline->fence_context = dma_fence_context_alloc(1);
 102
 103         mutex_init(&timeline->mutex);
 104
 105         INIT_ACTIVE_FENCE(&timeline->last_request);
 106         INIT_LIST_HEAD(&timeline->requests);
 107
 108         i915_syncmap_init(&timeline->sync);
 109         i915_active_init(&timeline->active, __timeline_active,
 110                          __timeline_retire, 0);
 111
 112         return 0;
 113 }
 114
 115 void intel_gt_init_timelines(struct intel_gt *gt)
 116 {
 117         struct intel_gt_timelines *timelines = &gt->timelines;
 118
 119         spin_lock_init(&timelines->lock);
 120         INIT_LIST_HEAD(&timelines->active_list);
 121 }
 122
 123 static void intel_timeline_fini(struct rcu_head *rcu)
 124 {
 125         struct intel_timeline *timeline =
 126                 container_of(rcu, struct intel_timeline, rcu);
 127
 128         if (timeline->hwsp_map)
 129                 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 130
 131         i915_vma_put(timeline->hwsp_ggtt);
 132         i915_active_fini(&timeline->active);
 133
 134         /*
 135          * A small race exists between intel_gt_retire_requests_timeout and
 136          * intel_timeline_exit which could result in the syncmap not getting
 137          * free'd. Rather than work to hard to seal this race, simply cleanup
 138          * the syncmap on fini.
 139          */
 140         i915_syncmap_free(&timeline->sync);
 141
 142         kfree(timeline);
 143 }
 144
 145 struct intel_timeline *
 146 __intel_timeline_create(struct intel_gt *gt,
 147                         struct i915_vma *global_hwsp,
 148                         unsigned int offset)
 149 {
 150         struct intel_timeline *timeline;
 151         int err;
 152
 153         timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 154         if (!timeline)
 155                 return ERR_PTR(-ENOMEM);
 156
 157         err = intel_timeline_init(timeline, gt, global_hwsp, offset);
 158         if (err) {
 159                 kfree(timeline);
 160                 return ERR_PTR(err);
 161         }
 162
 163         return timeline;
 164 }
 165
 166 struct intel_timeline *
 167 intel_timeline_create_from_engine(struct intel_engine_cs *engine,
 168                                   unsigned int offset)
 169 {
 170         struct i915_vma *hwsp = engine->status_page.vma;
 171         struct intel_timeline *tl;
 172
 173         tl = __intel_timeline_create(engine->gt, hwsp, offset);
 174         if (IS_ERR(tl))
 175                 return tl;
 176
 177         /* Borrow a nearby lock; we only create these timelines during init */
 178         mutex_lock(&hwsp->vm->mutex);
 179         list_add_tail(&tl->engine_link, &engine->status_page.timelines);
 180         mutex_unlock(&hwsp->vm->mutex);
 181
 182         return tl;
 183 }
 184
 185 void __intel_timeline_pin(struct intel_timeline *tl)
 186 {
 187         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 188         atomic_inc(&tl->pin_count);
 189 }
 190
 191 int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 192 {
 193         int err;
 194
 195         if (atomic_add_unless(&tl->pin_count, 1, 0))
 196                 return 0;
 197
 198         if (!tl->hwsp_map) {
 199                 err = intel_timeline_pin_map(tl);
 200                 if (err)
 201                         return err;
 202         }
 203
 204         err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
 205         if (err)
 206                 return err;
 207
 208         tl->hwsp_offset =
 209                 i915_ggtt_offset(tl->hwsp_ggtt) +
 210                 offset_in_page(tl->hwsp_offset);
 211         GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 212                  tl->fence_context, tl->hwsp_offset);
 213
 214         i915_active_acquire(&tl->active);
 215         if (atomic_fetch_inc(&tl->pin_count)) {
 216                 i915_active_release(&tl->active);
 217                 __i915_vma_unpin(tl->hwsp_ggtt);
 218         }
 219
 220         return 0;
 221 }
 222
 223 void intel_timeline_reset_seqno(const struct intel_timeline *tl)
 224 {
 225         u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno;
 226         /* Must be pinned to be writable, and no requests in flight. */
 227         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 228
 229         memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
 230         WRITE_ONCE(*hwsp_seqno, tl->seqno);
 231         drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
 232 }
 233
 234 void intel_timeline_enter(struct intel_timeline *tl)
 235 {
 236         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 237
 238         /*
 239          * Pretend we are serialised by the timeline->mutex.
 240          *
 241          * While generally true, there are a few exceptions to the rule
 242          * for the engine->kernel_context being used to manage power
 243          * transitions. As the engine_park may be called from under any
 244          * timeline, it uses the power mutex as a global serialisation
 245          * lock to prevent any other request entering its timeline.
 246          *
 247          * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
 248          *
 249          * However, intel_gt_retire_request() does not know which engine
 250          * it is retiring along and so cannot partake in the engine-pm
 251          * barrier, and there we use the tl->active_count as a means to
 252          * pin the timeline in the active_list while the locks are dropped.
 253          * Ergo, as that is outside of the engine-pm barrier, we need to
 254          * use atomic to manipulate tl->active_count.
 255          */
 256         lockdep_assert_held(&tl->mutex);
 257
 258         if (atomic_add_unless(&tl->active_count, 1, 0))
 259                 return;
 260
 261         spin_lock(&timelines->lock);
 262         if (!atomic_fetch_inc(&tl->active_count)) {
 263                 /*
 264                  * The HWSP is volatile, and may have been lost while inactive,
 265                  * e.g. across suspend/resume. Be paranoid, and ensure that
 266                  * the HWSP value matches our seqno so we don't proclaim
 267                  * the next request as already complete.
 268                  */
 269                 intel_timeline_reset_seqno(tl);
 270                 list_add_tail(&tl->link, &timelines->active_list);
 271         }
 272         spin_unlock(&timelines->lock);
 273 }
 274
 275 void intel_timeline_exit(struct intel_timeline *tl)
 276 {
 277         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 278
 279         /* See intel_timeline_enter() */
 280         lockdep_assert_held(&tl->mutex);
 281
 282         GEM_BUG_ON(!atomic_read(&tl->active_count));
 283         if (atomic_add_unless(&tl->active_count, -1, 1))
 284                 return;
 285
 286         spin_lock(&timelines->lock);
 287         if (atomic_dec_and_test(&tl->active_count))
 288                 list_del(&tl->link);
 289         spin_unlock(&timelines->lock);
 290
 291         /*
 292          * Since this timeline is idle, all bariers upon which we were waiting
 293          * must also be complete and so we can discard the last used barriers
 294          * without loss of information.
 295          */
 296         i915_syncmap_free(&tl->sync);
 297 }
 298
 299 static u32 timeline_advance(struct intel_timeline *tl)
 300 {
 301         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 302         GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 303
 304         return tl->seqno += 1 + tl->has_initial_breadcrumb;
 305 }
 306
 307 static noinline int
 308 __intel_timeline_get_seqno(struct intel_timeline *tl,
 309                            u32 *seqno)
 310 {
 311         u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES);
 312
 313         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 314         if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5)))
 315                 next_ofs = offset_in_page(next_ofs + BIT(5));
 316
 317         tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs;
 318         tl->hwsp_seqno = tl->hwsp_map + next_ofs;
 319         intel_timeline_reset_seqno(tl);
 320
 321         *seqno = timeline_advance(tl);
 322         GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
 323         return 0;
 324 }
 325
 326 int intel_timeline_get_seqno(struct intel_timeline *tl,
 327                              struct i915_request *rq,
 328                              u32 *seqno)
 329 {
 330         *seqno = timeline_advance(tl);
 331
 332         /* Replace the HWSP on wraparound for HW semaphores */
 333         if (unlikely(!*seqno && tl->has_initial_breadcrumb))
 334                 return __intel_timeline_get_seqno(tl, seqno);
 335
 336         return 0;
 337 }
 338
 339 int intel_timeline_read_hwsp(struct i915_request *from,
 340                              struct i915_request *to,
 341                              u32 *hwsp)
 342 {
 343         struct intel_timeline *tl;
 344         int err;
 345
 346         rcu_read_lock();
 347         tl = rcu_dereference(from->timeline);
 348         if (i915_request_signaled(from) ||
 349             !i915_active_acquire_if_busy(&tl->active))
 350                 tl = NULL;
 351
 352         if (tl) {
 353                 /* hwsp_offset may wraparound, so use from->hwsp_seqno */
 354                 *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) +
 355                         offset_in_page(from->hwsp_seqno);
 356         }
 357
 358         /* ensure we wait on the right request, if not, we completed */
 359         if (tl && __i915_request_is_complete(from)) {
 360                 i915_active_release(&tl->active);
 361                 tl = NULL;
 362         }
 363         rcu_read_unlock();
 364
 365         if (!tl)
 366                 return 1;
 367
 368         /* Can't do semaphore waits on kernel context */
 369         if (!tl->has_initial_breadcrumb) {
 370                 err = -EINVAL;
 371                 goto out;
 372         }
 373
 374         err = i915_active_add_request(&tl->active, to);
 375
 376 out:
 377         i915_active_release(&tl->active);
 378         return err;
 379 }
 380
 381 void intel_timeline_unpin(struct intel_timeline *tl)
 382 {
 383         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 384         if (!atomic_dec_and_test(&tl->pin_count))
 385                 return;
 386
 387         i915_active_release(&tl->active);
 388         __i915_vma_unpin(tl->hwsp_ggtt);
 389 }
 390
 391 void __intel_timeline_free(struct kref *kref)
 392 {
 393         struct intel_timeline *timeline =
 394                 container_of(kref, typeof(*timeline), kref);
 395
 396         GEM_BUG_ON(atomic_read(&timeline->pin_count));
 397         GEM_BUG_ON(!list_empty(&timeline->requests));
 398         GEM_BUG_ON(timeline->retire);
 399
 400         call_rcu(&timeline->rcu, intel_timeline_fini);
 401 }
 402
 403 void intel_gt_fini_timelines(struct intel_gt *gt)
 404 {
 405         struct intel_gt_timelines *timelines = &gt->timelines;
 406
 407         GEM_BUG_ON(!list_empty(&timelines->active_list));
 408 }
 409
 410 void intel_gt_show_timelines(struct intel_gt *gt,
 411                              struct drm_printer *m,
 412                              void (*show_request)(struct drm_printer *m,
 413                                                   const struct i915_request *rq,
 414                                                   const char *prefix,
 415                                                   int indent))
 416 {
 417         struct intel_gt_timelines *timelines = &gt->timelines;
 418         struct intel_timeline *tl, *tn;
 419         LIST_HEAD(free);
 420
 421         spin_lock(&timelines->lock);
 422         list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 423                 unsigned long count, ready, inflight;
 424                 struct i915_request *rq, *rn;
 425                 struct dma_fence *fence;
 426
 427                 if (!mutex_trylock(&tl->mutex)) {
 428                         drm_printf(m, "Timeline %llx: busy; skipping\n",
 429                                    tl->fence_context);
 430                         continue;
 431                 }
 432
 433                 intel_timeline_get(tl);
 434                 GEM_BUG_ON(!atomic_read(&tl->active_count));
 435                 atomic_inc(&tl->active_count); /* pin the list element */
 436                 spin_unlock(&timelines->lock);
 437
 438                 count = 0;
 439                 ready = 0;
 440                 inflight = 0;
 441                 list_for_each_entry_safe(rq, rn, &tl->requests, link) {
 442                         if (i915_request_completed(rq))
 443                                 continue;
 444
 445                         count++;
 446                         if (i915_request_is_ready(rq))
 447                                 ready++;
 448                         if (i915_request_is_active(rq))
 449                                 inflight++;
 450                 }
 451
 452                 drm_printf(m, "Timeline %llx: { ", tl->fence_context);
 453                 drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
 454                            count, ready, inflight);
 455                 drm_printf(m, ", seqno: { current: %d, last: %d }",
 456                            *tl->hwsp_seqno, tl->seqno);
 457                 fence = i915_active_fence_get(&tl->last_request);
 458                 if (fence) {
 459                         drm_printf(m, ", engine: %s",
 460                                    to_request(fence)->engine->name);
 461                         dma_fence_put(fence);
 462                 }
 463                 drm_printf(m, " }\n");
 464
 465                 if (show_request) {
 466                         list_for_each_entry_safe(rq, rn, &tl->requests, link)
 467                                 show_request(m, rq, "", 2);
 468                 }
 469
 470                 mutex_unlock(&tl->mutex);
 471                 spin_lock(&timelines->lock);
 472
 473                 /* Resume list iteration after reacquiring spinlock */
 474                 list_safe_reset_next(tl, tn, link);
 475                 if (atomic_dec_and_test(&tl->active_count))
 476                         list_del(&tl->link);
 477
 478                 /* Defer the final release to after the spinlock */
 479                 if (refcount_dec_and_test(&tl->kref.refcount)) {
 480                         GEM_BUG_ON(atomic_read(&tl->active_count));
 481                         list_add(&tl->link, &free);
 482                 }
 483         }
 484         spin_unlock(&timelines->lock);
 485
 486         list_for_each_entry_safe(tl, tn, &free, link)
 487                 __intel_timeline_free(&tl->kref);
 488 }
 489
 490 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 491 #include "gt/selftests/mock_timeline.c"
 492 #include "gt/selftest_timeline.c"
 493 #endif