drivers/gpu/drm/i915/gt/selftest_timeline.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2017-2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "intel_context.h"
  10 #include "intel_engine_heartbeat.h"
  11 #include "intel_engine_pm.h"
  12 #include "intel_gt.h"
  13 #include "intel_gt_requests.h"
  14 #include "intel_ring.h"
  15 #include "selftest_engine_heartbeat.h"
  16
  17 #include "../selftests/i915_random.h"
  18 #include "../i915_selftest.h"
  19
  20 #include "../selftests/igt_flush_test.h"
  21 #include "../selftests/mock_gem_device.h"
  22 #include "selftests/mock_timeline.h"
  23
  24 static struct page *hwsp_page(struct intel_timeline *tl)
  25 {
  26         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  27
  28         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  29         return sg_page(obj->mm.pages->sgl);
  30 }
  31
  32 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
  33 {
  34         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  35
  36         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  37 }
  38
  39 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  40
  41 struct mock_hwsp_freelist {
  42         struct intel_gt *gt;
  43         struct radix_tree_root cachelines;
  44         struct intel_timeline **history;
  45         unsigned long count, max;
  46         struct rnd_state prng;
  47 };
  48
  49 enum {
  50         SHUFFLE = BIT(0),
  51 };
  52
  53 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  54                                unsigned int idx,
  55                                struct intel_timeline *tl)
  56 {
  57         tl = xchg(&state->history[idx], tl);
  58         if (tl) {
  59                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  60                 intel_timeline_put(tl);
  61         }
  62 }
  63
  64 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  65                                 unsigned int count,
  66                                 unsigned int flags)
  67 {
  68         struct intel_timeline *tl;
  69         unsigned int idx;
  70
  71         while (count--) {
  72                 unsigned long cacheline;
  73                 int err;
  74
  75                 tl = intel_timeline_create(state->gt, NULL);
  76                 if (IS_ERR(tl))
  77                         return PTR_ERR(tl);
  78
  79                 cacheline = hwsp_cacheline(tl);
  80                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
  81                 if (err) {
  82                         if (err == -EEXIST) {
  83                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  84                                        cacheline);
  85                         }
  86                         intel_timeline_put(tl);
  87                         return err;
  88                 }
  89
  90                 idx = state->count++ % state->max;
  91                 __mock_hwsp_record(state, idx, tl);
  92         }
  93
  94         if (flags & SHUFFLE)
  95                 i915_prandom_shuffle(state->history,
  96                                      sizeof(*state->history),
  97                                      min(state->count, state->max),
  98                                      &state->prng);
  99
 100         count = i915_prandom_u32_max_state(min(state->count, state->max),
 101                                            &state->prng);
 102         while (count--) {
 103                 idx = --state->count % state->max;
 104                 __mock_hwsp_record(state, idx, NULL);
 105         }
 106
 107         return 0;
 108 }
 109
 110 static int mock_hwsp_freelist(void *arg)
 111 {
 112         struct mock_hwsp_freelist state;
 113         struct drm_i915_private *i915;
 114         const struct {
 115                 const char *name;
 116                 unsigned int flags;
 117         } phases[] = {
 118                 { "linear", 0 },
 119                 { "shuffled", SHUFFLE },
 120                 { },
 121         }, *p;
 122         unsigned int na;
 123         int err = 0;
 124
 125         i915 = mock_gem_device();
 126         if (!i915)
 127                 return -ENOMEM;
 128
 129         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 130         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 131
 132         state.gt = &i915->gt;
 133
 134         /*
 135          * Create a bunch of timelines and check that their HWSP do not overlap.
 136          * Free some, and try again.
 137          */
 138
 139         state.max = PAGE_SIZE / sizeof(*state.history);
 140         state.count = 0;
 141         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 142         if (!state.history) {
 143                 err = -ENOMEM;
 144                 goto err_put;
 145         }
 146
 147         for (p = phases; p->name; p++) {
 148                 pr_debug("%s(%s)\n", __func__, p->name);
 149                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 150                         err = __mock_hwsp_timeline(&state, na, p->flags);
 151                         if (err)
 152                                 goto out;
 153                 }
 154         }
 155
 156 out:
 157         for (na = 0; na < state.max; na++)
 158                 __mock_hwsp_record(&state, na, NULL);
 159         kfree(state.history);
 160 err_put:
 161         drm_dev_put(&i915->drm);
 162         return err;
 163 }
 164
 165 struct __igt_sync {
 166         const char *name;
 167         u32 seqno;
 168         bool expected;
 169         bool set;
 170 };
 171
 172 static int __igt_sync(struct intel_timeline *tl,
 173                       u64 ctx,
 174                       const struct __igt_sync *p,
 175                       const char *name)
 176 {
 177         int ret;
 178
 179         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 180                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 181                        name, p->name, ctx, p->seqno, yesno(p->expected));
 182                 return -EINVAL;
 183         }
 184
 185         if (p->set) {
 186                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
 187                 if (ret)
 188                         return ret;
 189         }
 190
 191         return 0;
 192 }
 193
 194 static int igt_sync(void *arg)
 195 {
 196         const struct __igt_sync pass[] = {
 197                 { "unset", 0, false, false },
 198                 { "new", 0, false, true },
 199                 { "0a", 0, true, true },
 200                 { "1a", 1, false, true },
 201                 { "1b", 1, true, true },
 202                 { "0b", 0, true, false },
 203                 { "2a", 2, false, true },
 204                 { "4", 4, false, true },
 205                 { "INT_MAX", INT_MAX, false, true },
 206                 { "INT_MAX-1", INT_MAX-1, true, false },
 207                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 208                 { "INT_MAX", INT_MAX, true, false },
 209                 { "UINT_MAX", UINT_MAX, false, true },
 210                 { "wrap", 0, false, true },
 211                 { "unwrap", UINT_MAX, true, false },
 212                 {},
 213         }, *p;
 214         struct intel_timeline tl;
 215         int order, offset;
 216         int ret = -ENODEV;
 217
 218         mock_timeline_init(&tl, 0);
 219         for (p = pass; p->name; p++) {
 220                 for (order = 1; order < 64; order++) {
 221                         for (offset = -1; offset <= (order > 1); offset++) {
 222                                 u64 ctx = BIT_ULL(order) + offset;
 223
 224                                 ret = __igt_sync(&tl, ctx, p, "1");
 225                                 if (ret)
 226                                         goto out;
 227                         }
 228                 }
 229         }
 230         mock_timeline_fini(&tl);
 231
 232         mock_timeline_init(&tl, 0);
 233         for (order = 1; order < 64; order++) {
 234                 for (offset = -1; offset <= (order > 1); offset++) {
 235                         u64 ctx = BIT_ULL(order) + offset;
 236
 237                         for (p = pass; p->name; p++) {
 238                                 ret = __igt_sync(&tl, ctx, p, "2");
 239                                 if (ret)
 240                                         goto out;
 241                         }
 242                 }
 243         }
 244
 245 out:
 246         mock_timeline_fini(&tl);
 247         return ret;
 248 }
 249
 250 static unsigned int random_engine(struct rnd_state *rnd)
 251 {
 252         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 253 }
 254
 255 static int bench_sync(void *arg)
 256 {
 257         struct rnd_state prng;
 258         struct intel_timeline tl;
 259         unsigned long end_time, count;
 260         u64 prng32_1M;
 261         ktime_t kt;
 262         int order, last_order;
 263
 264         mock_timeline_init(&tl, 0);
 265
 266         /* Lookups from cache are very fast and so the random number generation
 267          * and the loop itself becomes a significant factor in the per-iteration
 268          * timings. We try to compensate the results by measuring the overhead
 269          * of the prng and subtract it from the reported results.
 270          */
 271         prandom_seed_state(&prng, i915_selftest.random_seed);
 272         count = 0;
 273         kt = ktime_get();
 274         end_time = jiffies + HZ/10;
 275         do {
 276                 u32 x;
 277
 278                 /* Make sure the compiler doesn't optimise away the prng call */
 279                 WRITE_ONCE(x, prandom_u32_state(&prng));
 280
 281                 count++;
 282         } while (!time_after(jiffies, end_time));
 283         kt = ktime_sub(ktime_get(), kt);
 284         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 285                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 286         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 287
 288         /* Benchmark (only) setting random context ids */
 289         prandom_seed_state(&prng, i915_selftest.random_seed);
 290         count = 0;
 291         kt = ktime_get();
 292         end_time = jiffies + HZ/10;
 293         do {
 294                 u64 id = i915_prandom_u64_state(&prng);
 295
 296                 __intel_timeline_sync_set(&tl, id, 0);
 297                 count++;
 298         } while (!time_after(jiffies, end_time));
 299         kt = ktime_sub(ktime_get(), kt);
 300         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 301         pr_info("%s: %lu random insertions, %lluns/insert\n",
 302                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 303
 304         /* Benchmark looking up the exact same context ids as we just set */
 305         prandom_seed_state(&prng, i915_selftest.random_seed);
 306         end_time = count;
 307         kt = ktime_get();
 308         while (end_time--) {
 309                 u64 id = i915_prandom_u64_state(&prng);
 310
 311                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
 312                         mock_timeline_fini(&tl);
 313                         pr_err("Lookup of %llu failed\n", id);
 314                         return -EINVAL;
 315                 }
 316         }
 317         kt = ktime_sub(ktime_get(), kt);
 318         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 319         pr_info("%s: %lu random lookups, %lluns/lookup\n",
 320                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 321
 322         mock_timeline_fini(&tl);
 323         cond_resched();
 324
 325         mock_timeline_init(&tl, 0);
 326
 327         /* Benchmark setting the first N (in order) contexts */
 328         count = 0;
 329         kt = ktime_get();
 330         end_time = jiffies + HZ/10;
 331         do {
 332                 __intel_timeline_sync_set(&tl, count++, 0);
 333         } while (!time_after(jiffies, end_time));
 334         kt = ktime_sub(ktime_get(), kt);
 335         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 336                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 337
 338         /* Benchmark looking up the exact same context ids as we just set */
 339         end_time = count;
 340         kt = ktime_get();
 341         while (end_time--) {
 342                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
 343                         pr_err("Lookup of %lu failed\n", end_time);
 344                         mock_timeline_fini(&tl);
 345                         return -EINVAL;
 346                 }
 347         }
 348         kt = ktime_sub(ktime_get(), kt);
 349         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 350                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 351
 352         mock_timeline_fini(&tl);
 353         cond_resched();
 354
 355         mock_timeline_init(&tl, 0);
 356
 357         /* Benchmark searching for a random context id and maybe changing it */
 358         prandom_seed_state(&prng, i915_selftest.random_seed);
 359         count = 0;
 360         kt = ktime_get();
 361         end_time = jiffies + HZ/10;
 362         do {
 363                 u32 id = random_engine(&prng);
 364                 u32 seqno = prandom_u32_state(&prng);
 365
 366                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
 367                         __intel_timeline_sync_set(&tl, id, seqno);
 368
 369                 count++;
 370         } while (!time_after(jiffies, end_time));
 371         kt = ktime_sub(ktime_get(), kt);
 372         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 373         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 374                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 375         mock_timeline_fini(&tl);
 376         cond_resched();
 377
 378         /* Benchmark searching for a known context id and changing the seqno */
 379         for (last_order = 1, order = 1; order < 32;
 380              ({ int tmp = last_order; last_order = order; order += tmp; })) {
 381                 unsigned int mask = BIT(order) - 1;
 382
 383                 mock_timeline_init(&tl, 0);
 384
 385                 count = 0;
 386                 kt = ktime_get();
 387                 end_time = jiffies + HZ/10;
 388                 do {
 389                         /* Without assuming too many details of the underlying
 390                          * implementation, try to identify its phase-changes
 391                          * (if any)!
 392                          */
 393                         u64 id = (u64)(count & mask) << order;
 394
 395                         __intel_timeline_sync_is_later(&tl, id, 0);
 396                         __intel_timeline_sync_set(&tl, id, 0);
 397
 398                         count++;
 399                 } while (!time_after(jiffies, end_time));
 400                 kt = ktime_sub(ktime_get(), kt);
 401                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 402                         __func__, count, order,
 403                         (long long)div64_ul(ktime_to_ns(kt), count));
 404                 mock_timeline_fini(&tl);
 405                 cond_resched();
 406         }
 407
 408         return 0;
 409 }
 410
 411 int intel_timeline_mock_selftests(void)
 412 {
 413         static const struct i915_subtest tests[] = {
 414                 SUBTEST(mock_hwsp_freelist),
 415                 SUBTEST(igt_sync),
 416                 SUBTEST(bench_sync),
 417         };
 418
 419         return i915_subtests(tests, NULL);
 420 }
 421
 422 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 423 {
 424         u32 *cs;
 425
 426         cs = intel_ring_begin(rq, 4);
 427         if (IS_ERR(cs))
 428                 return PTR_ERR(cs);
 429
 430         if (INTEL_GEN(rq->engine->i915) >= 8) {
 431                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 432                 *cs++ = addr;
 433                 *cs++ = 0;
 434                 *cs++ = value;
 435         } else if (INTEL_GEN(rq->engine->i915) >= 4) {
 436                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 437                 *cs++ = 0;
 438                 *cs++ = addr;
 439                 *cs++ = value;
 440         } else {
 441                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 442                 *cs++ = addr;
 443                 *cs++ = value;
 444                 *cs++ = MI_NOOP;
 445         }
 446
 447         intel_ring_advance(rq, cs);
 448
 449         return 0;
 450 }
 451
 452 static struct i915_request *
 453 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 454 {
 455         struct i915_request *rq;
 456         int err;
 457
 458         err = intel_timeline_pin(tl);
 459         if (err) {
 460                 rq = ERR_PTR(err);
 461                 goto out;
 462         }
 463
 464         rq = intel_engine_create_kernel_request(engine);
 465         if (IS_ERR(rq))
 466                 goto out_unpin;
 467
 468         i915_request_get(rq);
 469
 470         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 471         i915_request_add(rq);
 472         if (err) {
 473                 i915_request_put(rq);
 474                 rq = ERR_PTR(err);
 475         }
 476
 477 out_unpin:
 478         intel_timeline_unpin(tl);
 479 out:
 480         if (IS_ERR(rq))
 481                 pr_err("Failed to write to timeline!\n");
 482         return rq;
 483 }
 484
 485 static struct intel_timeline *
 486 checked_intel_timeline_create(struct intel_gt *gt)
 487 {
 488         struct intel_timeline *tl;
 489
 490         tl = intel_timeline_create(gt, NULL);
 491         if (IS_ERR(tl))
 492                 return tl;
 493
 494         if (*tl->hwsp_seqno != tl->seqno) {
 495                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 496                        *tl->hwsp_seqno, tl->seqno);
 497                 intel_timeline_put(tl);
 498                 return ERR_PTR(-EINVAL);
 499         }
 500
 501         return tl;
 502 }
 503
 504 static int live_hwsp_engine(void *arg)
 505 {
 506 #define NUM_TIMELINES 4096
 507         struct intel_gt *gt = arg;
 508         struct intel_timeline **timelines;
 509         struct intel_engine_cs *engine;
 510         enum intel_engine_id id;
 511         unsigned long count, n;
 512         int err = 0;
 513
 514         /*
 515          * Create a bunch of timelines and check we can write
 516          * independently to each of their breadcrumb slots.
 517          */
 518
 519         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 520                                    sizeof(*timelines),
 521                                    GFP_KERNEL);
 522         if (!timelines)
 523                 return -ENOMEM;
 524
 525         count = 0;
 526         for_each_engine(engine, gt, id) {
 527                 if (!intel_engine_can_store_dword(engine))
 528                         continue;
 529
 530                 intel_engine_pm_get(engine);
 531
 532                 for (n = 0; n < NUM_TIMELINES; n++) {
 533                         struct intel_timeline *tl;
 534                         struct i915_request *rq;
 535
 536                         tl = checked_intel_timeline_create(gt);
 537                         if (IS_ERR(tl)) {
 538                                 err = PTR_ERR(tl);
 539                                 break;
 540                         }
 541
 542                         rq = tl_write(tl, engine, count);
 543                         if (IS_ERR(rq)) {
 544                                 intel_timeline_put(tl);
 545                                 err = PTR_ERR(rq);
 546                                 break;
 547                         }
 548
 549                         timelines[count++] = tl;
 550                         i915_request_put(rq);
 551                 }
 552
 553                 intel_engine_pm_put(engine);
 554                 if (err)
 555                         break;
 556         }
 557
 558         if (igt_flush_test(gt->i915))
 559                 err = -EIO;
 560
 561         for (n = 0; n < count; n++) {
 562                 struct intel_timeline *tl = timelines[n];
 563
 564                 if (!err && *tl->hwsp_seqno != n) {
 565                         pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
 566                                n, tl->hwsp_offset, *tl->hwsp_seqno);
 567                         GEM_TRACE_DUMP();
 568                         err = -EINVAL;
 569                 }
 570                 intel_timeline_put(tl);
 571         }
 572
 573         kvfree(timelines);
 574         return err;
 575 #undef NUM_TIMELINES
 576 }
 577
 578 static int live_hwsp_alternate(void *arg)
 579 {
 580 #define NUM_TIMELINES 4096
 581         struct intel_gt *gt = arg;
 582         struct intel_timeline **timelines;
 583         struct intel_engine_cs *engine;
 584         enum intel_engine_id id;
 585         unsigned long count, n;
 586         int err = 0;
 587
 588         /*
 589          * Create a bunch of timelines and check we can write
 590          * independently to each of their breadcrumb slots with adjacent
 591          * engines.
 592          */
 593
 594         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 595                                    sizeof(*timelines),
 596                                    GFP_KERNEL);
 597         if (!timelines)
 598                 return -ENOMEM;
 599
 600         count = 0;
 601         for (n = 0; n < NUM_TIMELINES; n++) {
 602                 for_each_engine(engine, gt, id) {
 603                         struct intel_timeline *tl;
 604                         struct i915_request *rq;
 605
 606                         if (!intel_engine_can_store_dword(engine))
 607                                 continue;
 608
 609                         tl = checked_intel_timeline_create(gt);
 610                         if (IS_ERR(tl)) {
 611                                 err = PTR_ERR(tl);
 612                                 goto out;
 613                         }
 614
 615                         intel_engine_pm_get(engine);
 616                         rq = tl_write(tl, engine, count);
 617                         intel_engine_pm_put(engine);
 618                         if (IS_ERR(rq)) {
 619                                 intel_timeline_put(tl);
 620                                 err = PTR_ERR(rq);
 621                                 goto out;
 622                         }
 623
 624                         timelines[count++] = tl;
 625                         i915_request_put(rq);
 626                 }
 627         }
 628
 629 out:
 630         if (igt_flush_test(gt->i915))
 631                 err = -EIO;
 632
 633         for (n = 0; n < count; n++) {
 634                 struct intel_timeline *tl = timelines[n];
 635
 636                 if (!err && *tl->hwsp_seqno != n) {
 637                         pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n",
 638                                n, tl->hwsp_offset, *tl->hwsp_seqno);
 639                         GEM_TRACE_DUMP();
 640                         err = -EINVAL;
 641                 }
 642                 intel_timeline_put(tl);
 643         }
 644
 645         kvfree(timelines);
 646         return err;
 647 #undef NUM_TIMELINES
 648 }
 649
 650 static int live_hwsp_wrap(void *arg)
 651 {
 652         struct intel_gt *gt = arg;
 653         struct intel_engine_cs *engine;
 654         struct intel_timeline *tl;
 655         enum intel_engine_id id;
 656         int err = 0;
 657
 658         /*
 659          * Across a seqno wrap, we need to keep the old cacheline alive for
 660          * foreign GPU references.
 661          */
 662
 663         tl = intel_timeline_create(gt, NULL);
 664         if (IS_ERR(tl))
 665                 return PTR_ERR(tl);
 666
 667         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 668                 goto out_free;
 669
 670         err = intel_timeline_pin(tl);
 671         if (err)
 672                 goto out_free;
 673
 674         for_each_engine(engine, gt, id) {
 675                 const u32 *hwsp_seqno[2];
 676                 struct i915_request *rq;
 677                 u32 seqno[2];
 678
 679                 if (!intel_engine_can_store_dword(engine))
 680                         continue;
 681
 682                 rq = intel_engine_create_kernel_request(engine);
 683                 if (IS_ERR(rq)) {
 684                         err = PTR_ERR(rq);
 685                         goto out;
 686                 }
 687
 688                 tl->seqno = -4u;
 689
 690                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 691                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
 692                 mutex_unlock(&tl->mutex);
 693                 if (err) {
 694                         i915_request_add(rq);
 695                         goto out;
 696                 }
 697                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 698                          seqno[0], tl->hwsp_offset);
 699
 700                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 701                 if (err) {
 702                         i915_request_add(rq);
 703                         goto out;
 704                 }
 705                 hwsp_seqno[0] = tl->hwsp_seqno;
 706
 707                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 708                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
 709                 mutex_unlock(&tl->mutex);
 710                 if (err) {
 711                         i915_request_add(rq);
 712                         goto out;
 713                 }
 714                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 715                          seqno[1], tl->hwsp_offset);
 716
 717                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 718                 if (err) {
 719                         i915_request_add(rq);
 720                         goto out;
 721                 }
 722                 hwsp_seqno[1] = tl->hwsp_seqno;
 723
 724                 /* With wrap should come a new hwsp */
 725                 GEM_BUG_ON(seqno[1] >= seqno[0]);
 726                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 727
 728                 i915_request_add(rq);
 729
 730                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 731                         pr_err("Wait for timeline writes timed out!\n");
 732                         err = -EIO;
 733                         goto out;
 734                 }
 735
 736                 if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
 737                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 738                                *hwsp_seqno[0], *hwsp_seqno[1],
 739                                seqno[0], seqno[1]);
 740                         err = -EINVAL;
 741                         goto out;
 742                 }
 743
 744                 intel_gt_retire_requests(gt); /* recycle HWSP */
 745         }
 746
 747 out:
 748         if (igt_flush_test(gt->i915))
 749                 err = -EIO;
 750
 751         intel_timeline_unpin(tl);
 752 out_free:
 753         intel_timeline_put(tl);
 754         return err;
 755 }
 756
 757 static int live_hwsp_rollover_kernel(void *arg)
 758 {
 759         struct intel_gt *gt = arg;
 760         struct intel_engine_cs *engine;
 761         enum intel_engine_id id;
 762         int err = 0;
 763
 764         /*
 765          * Run the host for long enough, and even the kernel context will
 766          * see a seqno rollover.
 767          */
 768
 769         for_each_engine(engine, gt, id) {
 770                 struct intel_context *ce = engine->kernel_context;
 771                 struct intel_timeline *tl = ce->timeline;
 772                 struct i915_request *rq[3] = {};
 773                 int i;
 774
 775                 st_engine_heartbeat_disable(engine);
 776                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
 777                         err = -EIO;
 778                         goto out;
 779                 }
 780
 781                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
 782                 tl->seqno = 0;
 783                 timeline_rollback(tl);
 784                 timeline_rollback(tl);
 785                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 786
 787                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 788                         struct i915_request *this;
 789
 790                         this = i915_request_create(ce);
 791                         if (IS_ERR(this)) {
 792                                 err = PTR_ERR(this);
 793                                 goto out;
 794                         }
 795
 796                         pr_debug("%s: create fence.seqnp:%d\n",
 797                                  engine->name,
 798                                  lower_32_bits(this->fence.seqno));
 799
 800                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 801
 802                         rq[i] = i915_request_get(this);
 803                         i915_request_add(this);
 804                 }
 805
 806                 /* We expected a wrap! */
 807                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 808
 809                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 810                         pr_err("Wait for timeline wrap timed out!\n");
 811                         err = -EIO;
 812                         goto out;
 813                 }
 814
 815                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 816                         if (!i915_request_completed(rq[i])) {
 817                                 pr_err("Pre-wrap request not completed!\n");
 818                                 err = -EINVAL;
 819                                 goto out;
 820                         }
 821                 }
 822
 823 out:
 824                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 825                         i915_request_put(rq[i]);
 826                 st_engine_heartbeat_enable(engine);
 827                 if (err)
 828                         break;
 829         }
 830
 831         if (igt_flush_test(gt->i915))
 832                 err = -EIO;
 833
 834         return err;
 835 }
 836
 837 static int live_hwsp_rollover_user(void *arg)
 838 {
 839         struct intel_gt *gt = arg;
 840         struct intel_engine_cs *engine;
 841         enum intel_engine_id id;
 842         int err = 0;
 843
 844         /*
 845          * Simulate a long running user context, and force the seqno wrap
 846          * on the user's timeline.
 847          */
 848
 849         for_each_engine(engine, gt, id) {
 850                 struct i915_request *rq[3] = {};
 851                 struct intel_timeline *tl;
 852                 struct intel_context *ce;
 853                 int i;
 854
 855                 ce = intel_context_create(engine);
 856                 if (IS_ERR(ce))
 857                         return PTR_ERR(ce);
 858
 859                 err = intel_context_alloc_state(ce);
 860                 if (err)
 861                         goto out;
 862
 863                 tl = ce->timeline;
 864                 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 865                         goto out;
 866
 867                 timeline_rollback(tl);
 868                 timeline_rollback(tl);
 869                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 870
 871                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 872                         struct i915_request *this;
 873
 874                         this = intel_context_create_request(ce);
 875                         if (IS_ERR(this)) {
 876                                 err = PTR_ERR(this);
 877                                 goto out;
 878                         }
 879
 880                         pr_debug("%s: create fence.seqnp:%d\n",
 881                                  engine->name,
 882                                  lower_32_bits(this->fence.seqno));
 883
 884                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 885
 886                         rq[i] = i915_request_get(this);
 887                         i915_request_add(this);
 888                 }
 889
 890                 /* We expected a wrap! */
 891                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 892
 893                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 894                         pr_err("Wait for timeline wrap timed out!\n");
 895                         err = -EIO;
 896                         goto out;
 897                 }
 898
 899                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 900                         if (!i915_request_completed(rq[i])) {
 901                                 pr_err("Pre-wrap request not completed!\n");
 902                                 err = -EINVAL;
 903                                 goto out;
 904                         }
 905                 }
 906
 907 out:
 908                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 909                         i915_request_put(rq[i]);
 910                 intel_context_put(ce);
 911                 if (err)
 912                         break;
 913         }
 914
 915         if (igt_flush_test(gt->i915))
 916                 err = -EIO;
 917
 918         return err;
 919 }
 920
 921 static int live_hwsp_recycle(void *arg)
 922 {
 923         struct intel_gt *gt = arg;
 924         struct intel_engine_cs *engine;
 925         enum intel_engine_id id;
 926         unsigned long count;
 927         int err = 0;
 928
 929         /*
 930          * Check seqno writes into one timeline at a time. We expect to
 931          * recycle the breadcrumb slot between iterations and neither
 932          * want to confuse ourselves or the GPU.
 933          */
 934
 935         count = 0;
 936         for_each_engine(engine, gt, id) {
 937                 IGT_TIMEOUT(end_time);
 938
 939                 if (!intel_engine_can_store_dword(engine))
 940                         continue;
 941
 942                 intel_engine_pm_get(engine);
 943
 944                 do {
 945                         struct intel_timeline *tl;
 946                         struct i915_request *rq;
 947
 948                         tl = checked_intel_timeline_create(gt);
 949                         if (IS_ERR(tl)) {
 950                                 err = PTR_ERR(tl);
 951                                 break;
 952                         }
 953
 954                         rq = tl_write(tl, engine, count);
 955                         if (IS_ERR(rq)) {
 956                                 intel_timeline_put(tl);
 957                                 err = PTR_ERR(rq);
 958                                 break;
 959                         }
 960
 961                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 962                                 pr_err("Wait for timeline writes timed out!\n");
 963                                 i915_request_put(rq);
 964                                 intel_timeline_put(tl);
 965                                 err = -EIO;
 966                                 break;
 967                         }
 968
 969                         if (*tl->hwsp_seqno != count) {
 970                                 pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n",
 971                                        count, *tl->hwsp_seqno);
 972                                 GEM_TRACE_DUMP();
 973                                 err = -EINVAL;
 974                         }
 975
 976                         i915_request_put(rq);
 977                         intel_timeline_put(tl);
 978                         count++;
 979
 980                         if (err)
 981                                 break;
 982                 } while (!__igt_timeout(end_time, NULL));
 983
 984                 intel_engine_pm_put(engine);
 985                 if (err)
 986                         break;
 987         }
 988
 989         return err;
 990 }
 991
 992 int intel_timeline_live_selftests(struct drm_i915_private *i915)
 993 {
 994         static const struct i915_subtest tests[] = {
 995                 SUBTEST(live_hwsp_recycle),
 996                 SUBTEST(live_hwsp_engine),
 997                 SUBTEST(live_hwsp_alternate),
 998                 SUBTEST(live_hwsp_wrap),
 999                 SUBTEST(live_hwsp_rollover_kernel),
1000                 SUBTEST(live_hwsp_rollover_user),
1001         };
1002
1003         if (intel_gt_is_wedged(&i915->gt))
1004                 return 0;
1005
1006         return intel_gt_live_subtests(tests, &i915->gt);
1007 }