drivers/gpu/drm/i915/gt/intel_lrc.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2014 Intel Corporation
   4  */
   5
   6 #include "gem/i915_gem_lmem.h"
   7
   8 #include "gen8_engine_cs.h"
   9 #include "i915_drv.h"
  10 #include "i915_perf.h"
  11 #include "i915_reg.h"
  12 #include "intel_context.h"
  13 #include "intel_engine.h"
  14 #include "intel_engine_regs.h"
  15 #include "intel_gpu_commands.h"
  16 #include "intel_gt.h"
  17 #include "intel_gt_regs.h"
  18 #include "intel_lrc.h"
  19 #include "intel_lrc_reg.h"
  20 #include "intel_ring.h"
  21 #include "shmem_utils.h"
  22
  23 /*
  24  * The per-platform tables are u8-encoded in @data. Decode @data and set the
  25  * addresses' offset and commands in @regs. The following encoding is used
  26  * for each byte. There are 2 steps: decoding commands and decoding addresses.
  27  *
  28  * Commands:
  29  * [7]: create NOPs - number of NOPs are set in lower bits
  30  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
  31  *      MI_LRI_FORCE_POSTED
  32  * [5:0]: Number of NOPs or registers to set values to in case of
  33  *        MI_LOAD_REGISTER_IMM
  34  *
  35  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
  36  * number of registers. They are set by using the REG/REG16 macros: the former
  37  * is used for offsets smaller than 0x200 while the latter is for values bigger
  38  * than that. Those macros already set all the bits documented below correctly:
  39  *
  40  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
  41  *      follow, for the lower bits
  42  * [6:0]: Register offset, without considering the engine base.
  43  *
  44  * This function only tweaks the commands and register offsets. Values are not
  45  * filled out.
  46  */
  47 static void set_offsets(u32 *regs,
  48                         const u8 *data,
  49                         const struct intel_engine_cs *engine,
  50                         bool close)
  51 #define NOP(x) (BIT(7) | (x))
  52 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
  53 #define POSTED BIT(0)
  54 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
  55 #define REG16(x) \
  56         (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
  57         (((x) >> 2) & 0x7f)
  58 #define END 0
  59 {
  60         const u32 base = engine->mmio_base;
  61
  62         while (*data) {
  63                 u8 count, flags;
  64
  65                 if (*data & BIT(7)) { /* skip */
  66                         count = *data++ & ~BIT(7);
  67                         regs += count;
  68                         continue;
  69                 }
  70
  71                 count = *data & 0x3f;
  72                 flags = *data >> 6;
  73                 data++;
  74
  75                 *regs = MI_LOAD_REGISTER_IMM(count);
  76                 if (flags & POSTED)
  77                         *regs |= MI_LRI_FORCE_POSTED;
  78                 if (GRAPHICS_VER(engine->i915) >= 11)
  79                         *regs |= MI_LRI_LRM_CS_MMIO;
  80                 regs++;
  81
  82                 GEM_BUG_ON(!count);
  83                 do {
  84                         u32 offset = 0;
  85                         u8 v;
  86
  87                         do {
  88                                 v = *data++;
  89                                 offset <<= 7;
  90                                 offset |= v & ~BIT(7);
  91                         } while (v & BIT(7));
  92
  93                         regs[0] = base + (offset << 2);
  94                         regs += 2;
  95                 } while (--count);
  96         }
  97
  98         if (close) {
  99                 /* Close the batch; used mainly by live_lrc_layout() */
 100                 *regs = MI_BATCH_BUFFER_END;
 101                 if (GRAPHICS_VER(engine->i915) >= 11)
 102                         *regs |= BIT(0);
 103         }
 104 }
 105
 106 static const u8 gen8_xcs_offsets[] = {
 107         NOP(1),
 108         LRI(11, 0),
 109         REG16(0x244),
 110         REG(0x034),
 111         REG(0x030),
 112         REG(0x038),
 113         REG(0x03c),
 114         REG(0x168),
 115         REG(0x140),
 116         REG(0x110),
 117         REG(0x11c),
 118         REG(0x114),
 119         REG(0x118),
 120
 121         NOP(9),
 122         LRI(9, 0),
 123         REG16(0x3a8),
 124         REG16(0x28c),
 125         REG16(0x288),
 126         REG16(0x284),
 127         REG16(0x280),
 128         REG16(0x27c),
 129         REG16(0x278),
 130         REG16(0x274),
 131         REG16(0x270),
 132
 133         NOP(13),
 134         LRI(2, 0),
 135         REG16(0x200),
 136         REG(0x028),
 137
 138         END
 139 };
 140
 141 static const u8 gen9_xcs_offsets[] = {
 142         NOP(1),
 143         LRI(14, POSTED),
 144         REG16(0x244),
 145         REG(0x034),
 146         REG(0x030),
 147         REG(0x038),
 148         REG(0x03c),
 149         REG(0x168),
 150         REG(0x140),
 151         REG(0x110),
 152         REG(0x11c),
 153         REG(0x114),
 154         REG(0x118),
 155         REG(0x1c0),
 156         REG(0x1c4),
 157         REG(0x1c8),
 158
 159         NOP(3),
 160         LRI(9, POSTED),
 161         REG16(0x3a8),
 162         REG16(0x28c),
 163         REG16(0x288),
 164         REG16(0x284),
 165         REG16(0x280),
 166         REG16(0x27c),
 167         REG16(0x278),
 168         REG16(0x274),
 169         REG16(0x270),
 170
 171         NOP(13),
 172         LRI(1, POSTED),
 173         REG16(0x200),
 174
 175         NOP(13),
 176         LRI(44, POSTED),
 177         REG(0x028),
 178         REG(0x09c),
 179         REG(0x0c0),
 180         REG(0x178),
 181         REG(0x17c),
 182         REG16(0x358),
 183         REG(0x170),
 184         REG(0x150),
 185         REG(0x154),
 186         REG(0x158),
 187         REG16(0x41c),
 188         REG16(0x600),
 189         REG16(0x604),
 190         REG16(0x608),
 191         REG16(0x60c),
 192         REG16(0x610),
 193         REG16(0x614),
 194         REG16(0x618),
 195         REG16(0x61c),
 196         REG16(0x620),
 197         REG16(0x624),
 198         REG16(0x628),
 199         REG16(0x62c),
 200         REG16(0x630),
 201         REG16(0x634),
 202         REG16(0x638),
 203         REG16(0x63c),
 204         REG16(0x640),
 205         REG16(0x644),
 206         REG16(0x648),
 207         REG16(0x64c),
 208         REG16(0x650),
 209         REG16(0x654),
 210         REG16(0x658),
 211         REG16(0x65c),
 212         REG16(0x660),
 213         REG16(0x664),
 214         REG16(0x668),
 215         REG16(0x66c),
 216         REG16(0x670),
 217         REG16(0x674),
 218         REG16(0x678),
 219         REG16(0x67c),
 220         REG(0x068),
 221
 222         END
 223 };
 224
 225 static const u8 gen12_xcs_offsets[] = {
 226         NOP(1),
 227         LRI(13, POSTED),
 228         REG16(0x244),
 229         REG(0x034),
 230         REG(0x030),
 231         REG(0x038),
 232         REG(0x03c),
 233         REG(0x168),
 234         REG(0x140),
 235         REG(0x110),
 236         REG(0x1c0),
 237         REG(0x1c4),
 238         REG(0x1c8),
 239         REG(0x180),
 240         REG16(0x2b4),
 241
 242         NOP(5),
 243         LRI(9, POSTED),
 244         REG16(0x3a8),
 245         REG16(0x28c),
 246         REG16(0x288),
 247         REG16(0x284),
 248         REG16(0x280),
 249         REG16(0x27c),
 250         REG16(0x278),
 251         REG16(0x274),
 252         REG16(0x270),
 253
 254         END
 255 };
 256
 257 static const u8 dg2_xcs_offsets[] = {
 258         NOP(1),
 259         LRI(15, POSTED),
 260         REG16(0x244),
 261         REG(0x034),
 262         REG(0x030),
 263         REG(0x038),
 264         REG(0x03c),
 265         REG(0x168),
 266         REG(0x140),
 267         REG(0x110),
 268         REG(0x1c0),
 269         REG(0x1c4),
 270         REG(0x1c8),
 271         REG(0x180),
 272         REG16(0x2b4),
 273         REG(0x120),
 274         REG(0x124),
 275
 276         NOP(1),
 277         LRI(9, POSTED),
 278         REG16(0x3a8),
 279         REG16(0x28c),
 280         REG16(0x288),
 281         REG16(0x284),
 282         REG16(0x280),
 283         REG16(0x27c),
 284         REG16(0x278),
 285         REG16(0x274),
 286         REG16(0x270),
 287
 288         END
 289 };
 290
 291 static const u8 gen8_rcs_offsets[] = {
 292         NOP(1),
 293         LRI(14, POSTED),
 294         REG16(0x244),
 295         REG(0x034),
 296         REG(0x030),
 297         REG(0x038),
 298         REG(0x03c),
 299         REG(0x168),
 300         REG(0x140),
 301         REG(0x110),
 302         REG(0x11c),
 303         REG(0x114),
 304         REG(0x118),
 305         REG(0x1c0),
 306         REG(0x1c4),
 307         REG(0x1c8),
 308
 309         NOP(3),
 310         LRI(9, POSTED),
 311         REG16(0x3a8),
 312         REG16(0x28c),
 313         REG16(0x288),
 314         REG16(0x284),
 315         REG16(0x280),
 316         REG16(0x27c),
 317         REG16(0x278),
 318         REG16(0x274),
 319         REG16(0x270),
 320
 321         NOP(13),
 322         LRI(1, 0),
 323         REG(0x0c8),
 324
 325         END
 326 };
 327
 328 static const u8 gen9_rcs_offsets[] = {
 329         NOP(1),
 330         LRI(14, POSTED),
 331         REG16(0x244),
 332         REG(0x34),
 333         REG(0x30),
 334         REG(0x38),
 335         REG(0x3c),
 336         REG(0x168),
 337         REG(0x140),
 338         REG(0x110),
 339         REG(0x11c),
 340         REG(0x114),
 341         REG(0x118),
 342         REG(0x1c0),
 343         REG(0x1c4),
 344         REG(0x1c8),
 345
 346         NOP(3),
 347         LRI(9, POSTED),
 348         REG16(0x3a8),
 349         REG16(0x28c),
 350         REG16(0x288),
 351         REG16(0x284),
 352         REG16(0x280),
 353         REG16(0x27c),
 354         REG16(0x278),
 355         REG16(0x274),
 356         REG16(0x270),
 357
 358         NOP(13),
 359         LRI(1, 0),
 360         REG(0xc8),
 361
 362         NOP(13),
 363         LRI(44, POSTED),
 364         REG(0x28),
 365         REG(0x9c),
 366         REG(0xc0),
 367         REG(0x178),
 368         REG(0x17c),
 369         REG16(0x358),
 370         REG(0x170),
 371         REG(0x150),
 372         REG(0x154),
 373         REG(0x158),
 374         REG16(0x41c),
 375         REG16(0x600),
 376         REG16(0x604),
 377         REG16(0x608),
 378         REG16(0x60c),
 379         REG16(0x610),
 380         REG16(0x614),
 381         REG16(0x618),
 382         REG16(0x61c),
 383         REG16(0x620),
 384         REG16(0x624),
 385         REG16(0x628),
 386         REG16(0x62c),
 387         REG16(0x630),
 388         REG16(0x634),
 389         REG16(0x638),
 390         REG16(0x63c),
 391         REG16(0x640),
 392         REG16(0x644),
 393         REG16(0x648),
 394         REG16(0x64c),
 395         REG16(0x650),
 396         REG16(0x654),
 397         REG16(0x658),
 398         REG16(0x65c),
 399         REG16(0x660),
 400         REG16(0x664),
 401         REG16(0x668),
 402         REG16(0x66c),
 403         REG16(0x670),
 404         REG16(0x674),
 405         REG16(0x678),
 406         REG16(0x67c),
 407         REG(0x68),
 408
 409         END
 410 };
 411
 412 static const u8 gen11_rcs_offsets[] = {
 413         NOP(1),
 414         LRI(15, POSTED),
 415         REG16(0x244),
 416         REG(0x034),
 417         REG(0x030),
 418         REG(0x038),
 419         REG(0x03c),
 420         REG(0x168),
 421         REG(0x140),
 422         REG(0x110),
 423         REG(0x11c),
 424         REG(0x114),
 425         REG(0x118),
 426         REG(0x1c0),
 427         REG(0x1c4),
 428         REG(0x1c8),
 429         REG(0x180),
 430
 431         NOP(1),
 432         LRI(9, POSTED),
 433         REG16(0x3a8),
 434         REG16(0x28c),
 435         REG16(0x288),
 436         REG16(0x284),
 437         REG16(0x280),
 438         REG16(0x27c),
 439         REG16(0x278),
 440         REG16(0x274),
 441         REG16(0x270),
 442
 443         LRI(1, POSTED),
 444         REG(0x1b0),
 445
 446         NOP(10),
 447         LRI(1, 0),
 448         REG(0x0c8),
 449
 450         END
 451 };
 452
 453 static const u8 gen12_rcs_offsets[] = {
 454         NOP(1),
 455         LRI(13, POSTED),
 456         REG16(0x244),
 457         REG(0x034),
 458         REG(0x030),
 459         REG(0x038),
 460         REG(0x03c),
 461         REG(0x168),
 462         REG(0x140),
 463         REG(0x110),
 464         REG(0x1c0),
 465         REG(0x1c4),
 466         REG(0x1c8),
 467         REG(0x180),
 468         REG16(0x2b4),
 469
 470         NOP(5),
 471         LRI(9, POSTED),
 472         REG16(0x3a8),
 473         REG16(0x28c),
 474         REG16(0x288),
 475         REG16(0x284),
 476         REG16(0x280),
 477         REG16(0x27c),
 478         REG16(0x278),
 479         REG16(0x274),
 480         REG16(0x270),
 481
 482         LRI(3, POSTED),
 483         REG(0x1b0),
 484         REG16(0x5a8),
 485         REG16(0x5ac),
 486
 487         NOP(6),
 488         LRI(1, 0),
 489         REG(0x0c8),
 490         NOP(3 + 9 + 1),
 491
 492         LRI(51, POSTED),
 493         REG16(0x588),
 494         REG16(0x588),
 495         REG16(0x588),
 496         REG16(0x588),
 497         REG16(0x588),
 498         REG16(0x588),
 499         REG(0x028),
 500         REG(0x09c),
 501         REG(0x0c0),
 502         REG(0x178),
 503         REG(0x17c),
 504         REG16(0x358),
 505         REG(0x170),
 506         REG(0x150),
 507         REG(0x154),
 508         REG(0x158),
 509         REG16(0x41c),
 510         REG16(0x600),
 511         REG16(0x604),
 512         REG16(0x608),
 513         REG16(0x60c),
 514         REG16(0x610),
 515         REG16(0x614),
 516         REG16(0x618),
 517         REG16(0x61c),
 518         REG16(0x620),
 519         REG16(0x624),
 520         REG16(0x628),
 521         REG16(0x62c),
 522         REG16(0x630),
 523         REG16(0x634),
 524         REG16(0x638),
 525         REG16(0x63c),
 526         REG16(0x640),
 527         REG16(0x644),
 528         REG16(0x648),
 529         REG16(0x64c),
 530         REG16(0x650),
 531         REG16(0x654),
 532         REG16(0x658),
 533         REG16(0x65c),
 534         REG16(0x660),
 535         REG16(0x664),
 536         REG16(0x668),
 537         REG16(0x66c),
 538         REG16(0x670),
 539         REG16(0x674),
 540         REG16(0x678),
 541         REG16(0x67c),
 542         REG(0x068),
 543         REG(0x084),
 544         NOP(1),
 545
 546         END
 547 };
 548
 549 static const u8 xehp_rcs_offsets[] = {
 550         NOP(1),
 551         LRI(13, POSTED),
 552         REG16(0x244),
 553         REG(0x034),
 554         REG(0x030),
 555         REG(0x038),
 556         REG(0x03c),
 557         REG(0x168),
 558         REG(0x140),
 559         REG(0x110),
 560         REG(0x1c0),
 561         REG(0x1c4),
 562         REG(0x1c8),
 563         REG(0x180),
 564         REG16(0x2b4),
 565
 566         NOP(5),
 567         LRI(9, POSTED),
 568         REG16(0x3a8),
 569         REG16(0x28c),
 570         REG16(0x288),
 571         REG16(0x284),
 572         REG16(0x280),
 573         REG16(0x27c),
 574         REG16(0x278),
 575         REG16(0x274),
 576         REG16(0x270),
 577
 578         LRI(3, POSTED),
 579         REG(0x1b0),
 580         REG16(0x5a8),
 581         REG16(0x5ac),
 582
 583         NOP(6),
 584         LRI(1, 0),
 585         REG(0x0c8),
 586
 587         END
 588 };
 589
 590 static const u8 dg2_rcs_offsets[] = {
 591         NOP(1),
 592         LRI(15, POSTED),
 593         REG16(0x244),
 594         REG(0x034),
 595         REG(0x030),
 596         REG(0x038),
 597         REG(0x03c),
 598         REG(0x168),
 599         REG(0x140),
 600         REG(0x110),
 601         REG(0x1c0),
 602         REG(0x1c4),
 603         REG(0x1c8),
 604         REG(0x180),
 605         REG16(0x2b4),
 606         REG(0x120),
 607         REG(0x124),
 608
 609         NOP(1),
 610         LRI(9, POSTED),
 611         REG16(0x3a8),
 612         REG16(0x28c),
 613         REG16(0x288),
 614         REG16(0x284),
 615         REG16(0x280),
 616         REG16(0x27c),
 617         REG16(0x278),
 618         REG16(0x274),
 619         REG16(0x270),
 620
 621         LRI(3, POSTED),
 622         REG(0x1b0),
 623         REG16(0x5a8),
 624         REG16(0x5ac),
 625
 626         NOP(6),
 627         LRI(1, 0),
 628         REG(0x0c8),
 629
 630         END
 631 };
 632
 633 static const u8 mtl_rcs_offsets[] = {
 634         NOP(1),
 635         LRI(15, POSTED),
 636         REG16(0x244),
 637         REG(0x034),
 638         REG(0x030),
 639         REG(0x038),
 640         REG(0x03c),
 641         REG(0x168),
 642         REG(0x140),
 643         REG(0x110),
 644         REG(0x1c0),
 645         REG(0x1c4),
 646         REG(0x1c8),
 647         REG(0x180),
 648         REG16(0x2b4),
 649         REG(0x120),
 650         REG(0x124),
 651
 652         NOP(1),
 653         LRI(9, POSTED),
 654         REG16(0x3a8),
 655         REG16(0x28c),
 656         REG16(0x288),
 657         REG16(0x284),
 658         REG16(0x280),
 659         REG16(0x27c),
 660         REG16(0x278),
 661         REG16(0x274),
 662         REG16(0x270),
 663
 664         NOP(2),
 665         LRI(2, POSTED),
 666         REG16(0x5a8),
 667         REG16(0x5ac),
 668
 669         NOP(6),
 670         LRI(1, 0),
 671         REG(0x0c8),
 672
 673         END
 674 };
 675
 676 #undef END
 677 #undef REG16
 678 #undef REG
 679 #undef LRI
 680 #undef NOP
 681
 682 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 683 {
 684         /*
 685          * The gen12+ lists only have the registers we program in the basic
 686          * default state. We rely on the context image using relative
 687          * addressing to automatic fixup the register state between the
 688          * physical engines for virtual engine.
 689          */
 690         GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
 691                    !intel_engine_has_relative_mmio(engine));
 692
 693         if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
 694                 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
 695                         return mtl_rcs_offsets;
 696                 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 697                         return dg2_rcs_offsets;
 698                 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 699                         return xehp_rcs_offsets;
 700                 else if (GRAPHICS_VER(engine->i915) >= 12)
 701                         return gen12_rcs_offsets;
 702                 else if (GRAPHICS_VER(engine->i915) >= 11)
 703                         return gen11_rcs_offsets;
 704                 else if (GRAPHICS_VER(engine->i915) >= 9)
 705                         return gen9_rcs_offsets;
 706                 else
 707                         return gen8_rcs_offsets;
 708         } else {
 709                 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 710                         return dg2_xcs_offsets;
 711                 else if (GRAPHICS_VER(engine->i915) >= 12)
 712                         return gen12_xcs_offsets;
 713                 else if (GRAPHICS_VER(engine->i915) >= 9)
 714                         return gen9_xcs_offsets;
 715                 else
 716                         return gen8_xcs_offsets;
 717         }
 718 }
 719
 720 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 721 {
 722         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 723                 return 0x70;
 724         else if (GRAPHICS_VER(engine->i915) >= 12)
 725                 return 0x60;
 726         else if (GRAPHICS_VER(engine->i915) >= 9)
 727                 return 0x54;
 728         else if (engine->class == RENDER_CLASS)
 729                 return 0x58;
 730         else
 731                 return -1;
 732 }
 733
 734 static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
 735 {
 736         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 737                 return 0x80;
 738         else if (GRAPHICS_VER(engine->i915) >= 12)
 739                 return 0x70;
 740         else if (GRAPHICS_VER(engine->i915) >= 9)
 741                 return 0x64;
 742         else if (GRAPHICS_VER(engine->i915) >= 8 &&
 743                  engine->class == RENDER_CLASS)
 744                 return 0xc4;
 745         else
 746                 return -1;
 747 }
 748
 749 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 750 {
 751         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 752                 return 0x84;
 753         else if (GRAPHICS_VER(engine->i915) >= 12)
 754                 return 0x74;
 755         else if (GRAPHICS_VER(engine->i915) >= 9)
 756                 return 0x68;
 757         else if (engine->class == RENDER_CLASS)
 758                 return 0xd8;
 759         else
 760                 return -1;
 761 }
 762
 763 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 764 {
 765         if (GRAPHICS_VER(engine->i915) >= 12)
 766                 return 0x12;
 767         else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 768                 return 0x18;
 769         else
 770                 return -1;
 771 }
 772
 773 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 774 {
 775         int x;
 776
 777         x = lrc_ring_wa_bb_per_ctx(engine);
 778         if (x < 0)
 779                 return x;
 780
 781         return x + 2;
 782 }
 783
 784 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 785 {
 786         int x;
 787
 788         x = lrc_ring_indirect_ptr(engine);
 789         if (x < 0)
 790                 return x;
 791
 792         return x + 2;
 793 }
 794
 795 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 796 {
 797
 798         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 799                 /*
 800                  * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
 801                  * simply to match the RCS context image layout.
 802                  */
 803                 return 0xc6;
 804         else if (engine->class != RENDER_CLASS)
 805                 return -1;
 806         else if (GRAPHICS_VER(engine->i915) >= 12)
 807                 return 0xb6;
 808         else if (GRAPHICS_VER(engine->i915) >= 11)
 809                 return 0xaa;
 810         else
 811                 return -1;
 812 }
 813
 814 static u32
 815 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 816 {
 817         if (GRAPHICS_VER(engine->i915) >= 12)
 818                 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 819         else if (GRAPHICS_VER(engine->i915) >= 11)
 820                 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 821         else if (GRAPHICS_VER(engine->i915) >= 9)
 822                 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 823         else if (GRAPHICS_VER(engine->i915) >= 8)
 824                 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 825
 826         GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
 827
 828         return 0;
 829 }
 830
 831 static void
 832 lrc_setup_bb_per_ctx(u32 *regs,
 833                      const struct intel_engine_cs *engine,
 834                      u32 ctx_bb_ggtt_addr)
 835 {
 836         GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
 837         regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 838                 ctx_bb_ggtt_addr |
 839                 PER_CTX_BB_FORCE |
 840                 PER_CTX_BB_VALID;
 841 }
 842
 843 static void
 844 lrc_setup_indirect_ctx(u32 *regs,
 845                        const struct intel_engine_cs *engine,
 846                        u32 ctx_bb_ggtt_addr,
 847                        u32 size)
 848 {
 849         GEM_BUG_ON(!size);
 850         GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 851         GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 852         regs[lrc_ring_indirect_ptr(engine) + 1] =
 853                 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 854
 855         GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 856         regs[lrc_ring_indirect_offset(engine) + 1] =
 857                 lrc_ring_indirect_offset_default(engine) << 6;
 858 }
 859
 860 static bool ctx_needs_runalone(const struct intel_context *ce)
 861 {
 862         struct i915_gem_context *gem_ctx;
 863         bool ctx_is_protected = false;
 864
 865         /*
 866          * On MTL and newer platforms, protected contexts require setting
 867          * the LRC run-alone bit or else the encryption will not happen.
 868          */
 869         if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
 870             (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
 871                 rcu_read_lock();
 872                 gem_ctx = rcu_dereference(ce->gem_context);
 873                 if (gem_ctx)
 874                         ctx_is_protected = gem_ctx->uses_protected_content;
 875                 rcu_read_unlock();
 876         }
 877
 878         return ctx_is_protected;
 879 }
 880
 881 static void init_common_regs(u32 * const regs,
 882                              const struct intel_context *ce,
 883                              const struct intel_engine_cs *engine,
 884                              bool inhibit)
 885 {
 886         u32 ctl;
 887         int loc;
 888
 889         ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
 890         ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 891         if (inhibit)
 892                 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
 893         if (GRAPHICS_VER(engine->i915) < 11)
 894                 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
 895                                            CTX_CTRL_RS_CTX_ENABLE);
 896         if (ctx_needs_runalone(ce))
 897                 ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
 898         regs[CTX_CONTEXT_CONTROL] = ctl;
 899
 900         regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
 901
 902         loc = lrc_ring_bb_offset(engine);
 903         if (loc != -1)
 904                 regs[loc + 1] = 0;
 905 }
 906
 907 static void init_wa_bb_regs(u32 * const regs,
 908                             const struct intel_engine_cs *engine)
 909 {
 910         const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 911
 912         if (wa_ctx->per_ctx.size) {
 913                 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 914
 915                 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
 916                 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 917                         (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
 918         }
 919
 920         if (wa_ctx->indirect_ctx.size) {
 921                 lrc_setup_indirect_ctx(regs, engine,
 922                                        i915_ggtt_offset(wa_ctx->vma) +
 923                                        wa_ctx->indirect_ctx.offset,
 924                                        wa_ctx->indirect_ctx.size);
 925         }
 926 }
 927
 928 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
 929 {
 930         if (i915_vm_is_4lvl(&ppgtt->vm)) {
 931                 /* 64b PPGTT (48bit canonical)
 932                  * PDP0_DESCRIPTOR contains the base address to PML4 and
 933                  * other PDP Descriptors are ignored.
 934                  */
 935                 ASSIGN_CTX_PML4(ppgtt, regs);
 936         } else {
 937                 ASSIGN_CTX_PDP(ppgtt, regs, 3);
 938                 ASSIGN_CTX_PDP(ppgtt, regs, 2);
 939                 ASSIGN_CTX_PDP(ppgtt, regs, 1);
 940                 ASSIGN_CTX_PDP(ppgtt, regs, 0);
 941         }
 942 }
 943
 944 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
 945 {
 946         if (i915_is_ggtt(vm))
 947                 return i915_vm_to_ggtt(vm)->alias;
 948         else
 949                 return i915_vm_to_ppgtt(vm);
 950 }
 951
 952 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
 953 {
 954         int x;
 955
 956         x = lrc_ring_mi_mode(engine);
 957         if (x != -1) {
 958                 regs[x + 1] &= ~STOP_RING;
 959                 regs[x + 1] |= STOP_RING << 16;
 960         }
 961 }
 962
 963 static void __lrc_init_regs(u32 *regs,
 964                             const struct intel_context *ce,
 965                             const struct intel_engine_cs *engine,
 966                             bool inhibit)
 967 {
 968         /*
 969          * A context is actually a big batch buffer with several
 970          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
 971          * values we are setting here are only for the first context restore:
 972          * on a subsequent save, the GPU will recreate this batchbuffer with new
 973          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
 974          * we are not initializing here).
 975          *
 976          * Must keep consistent with virtual_update_register_offsets().
 977          */
 978
 979         if (inhibit)
 980                 memset(regs, 0, PAGE_SIZE);
 981
 982         set_offsets(regs, reg_offsets(engine), engine, inhibit);
 983
 984         init_common_regs(regs, ce, engine, inhibit);
 985         init_ppgtt_regs(regs, vm_alias(ce->vm));
 986
 987         init_wa_bb_regs(regs, engine);
 988
 989         __reset_stop_ring(regs, engine);
 990 }
 991
 992 void lrc_init_regs(const struct intel_context *ce,
 993                    const struct intel_engine_cs *engine,
 994                    bool inhibit)
 995 {
 996         __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
 997 }
 998
 999 void lrc_reset_regs(const struct intel_context *ce,
1000                     const struct intel_engine_cs *engine)
1001 {
1002         __reset_stop_ring(ce->lrc_reg_state, engine);
1003 }
1004
1005 static void
1006 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
1007 {
1008         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1009                 return;
1010
1011         vaddr += engine->context_size;
1012
1013         memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
1014 }
1015
1016 static void
1017 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
1018 {
1019         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1020                 return;
1021
1022         vaddr += engine->context_size;
1023
1024         if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
1025                 drm_err_once(&engine->i915->drm,
1026                              "%s context redzone overwritten!\n",
1027                              engine->name);
1028 }
1029
1030 static u32 context_wa_bb_offset(const struct intel_context *ce)
1031 {
1032         return PAGE_SIZE * ce->wa_bb_page;
1033 }
1034
1035 /*
1036  * per_ctx below determines which WABB section is used.
1037  * When true, the function returns the location of the
1038  * PER_CTX_BB.  When false, the function returns the
1039  * location of the INDIRECT_CTX.
1040  */
1041 static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
1042 {
1043         void *ptr;
1044
1045         GEM_BUG_ON(!ce->wa_bb_page);
1046
1047         ptr = ce->lrc_reg_state;
1048         ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1049         ptr += context_wa_bb_offset(ce);
1050         ptr += per_ctx ? PAGE_SIZE : 0;
1051
1052         return ptr;
1053 }
1054
1055 void lrc_init_state(struct intel_context *ce,
1056                     struct intel_engine_cs *engine,
1057                     void *state)
1058 {
1059         bool inhibit = true;
1060
1061         set_redzone(state, engine);
1062
1063         if (engine->default_state) {
1064                 shmem_read(engine->default_state, 0,
1065                            state, engine->context_size);
1066                 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
1067                 inhibit = false;
1068         }
1069
1070         /* Clear the ppHWSP (inc. per-context counters) */
1071         memset(state, 0, PAGE_SIZE);
1072
1073         /* Clear the indirect wa and storage */
1074         if (ce->wa_bb_page)
1075                 memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1076
1077         /*
1078          * The second page of the context object contains some registers which
1079          * must be set up prior to the first execution.
1080          */
1081         __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
1082 }
1083
1084 u32 lrc_indirect_bb(const struct intel_context *ce)
1085 {
1086         return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
1087 }
1088
1089 static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1090 {
1091         /* If predication is active, this will be noop'ed */
1092         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1093         *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1094         *cs++ = 0;
1095         *cs++ = 0; /* No predication */
1096
1097         /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1098         *cs++ = MI_BATCH_BUFFER_END | BIT(15);
1099         *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1100
1101         /* Instructions are no longer predicated (disabled), we can proceed */
1102         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1103         *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1104         *cs++ = 0;
1105         *cs++ = 1; /* enable predication before the next BB */
1106
1107         *cs++ = MI_BATCH_BUFFER_END;
1108         GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1109
1110         return cs;
1111 }
1112
1113 static struct i915_vma *
1114 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1115 {
1116         struct drm_i915_gem_object *obj;
1117         struct i915_vma *vma;
1118         u32 context_size;
1119
1120         context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1121
1122         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1123                 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1124
1125         if (GRAPHICS_VER(engine->i915) >= 12) {
1126                 ce->wa_bb_page = context_size / PAGE_SIZE;
1127                 /* INDIRECT_CTX and PER_CTX_BB need separate pages. */
1128                 context_size += PAGE_SIZE * 2;
1129         }
1130
1131         if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1132                 ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1133                 context_size += PARENT_SCRATCH_SIZE;
1134         }
1135
1136         obj = i915_gem_object_create_lmem(engine->i915, context_size,
1137                                           I915_BO_ALLOC_PM_VOLATILE);
1138         if (IS_ERR(obj)) {
1139                 obj = i915_gem_object_create_shmem(engine->i915, context_size);
1140                 if (IS_ERR(obj))
1141                         return ERR_CAST(obj);
1142
1143                 /*
1144                  * Wa_22016122933: For Media version 13.0, all Media GT shared
1145                  * memory needs to be mapped as WC on CPU side and UC (PAT
1146                  * index 2) on GPU side.
1147                  */
1148                 if (intel_gt_needs_wa_22016122933(engine->gt))
1149                         i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
1150         }
1151
1152         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1153         if (IS_ERR(vma)) {
1154                 i915_gem_object_put(obj);
1155                 return vma;
1156         }
1157
1158         return vma;
1159 }
1160
1161 static struct intel_timeline *
1162 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1163 {
1164         struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1165
1166         return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1167 }
1168
1169 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1170 {
1171         struct intel_ring *ring;
1172         struct i915_vma *vma;
1173         int err;
1174
1175         GEM_BUG_ON(ce->state);
1176
1177         vma = __lrc_alloc_state(ce, engine);
1178         if (IS_ERR(vma))
1179                 return PTR_ERR(vma);
1180
1181         ring = intel_engine_create_ring(engine, ce->ring_size);
1182         if (IS_ERR(ring)) {
1183                 err = PTR_ERR(ring);
1184                 goto err_vma;
1185         }
1186
1187         if (!page_mask_bits(ce->timeline)) {
1188                 struct intel_timeline *tl;
1189
1190                 /*
1191                  * Use the static global HWSP for the kernel context, and
1192                  * a dynamically allocated cacheline for everyone else.
1193                  */
1194                 if (unlikely(ce->timeline))
1195                         tl = pinned_timeline(ce, engine);
1196                 else
1197                         tl = intel_timeline_create(engine->gt);
1198                 if (IS_ERR(tl)) {
1199                         err = PTR_ERR(tl);
1200                         goto err_ring;
1201                 }
1202
1203                 ce->timeline = tl;
1204         }
1205
1206         ce->ring = ring;
1207         ce->state = vma;
1208
1209         return 0;
1210
1211 err_ring:
1212         intel_ring_put(ring);
1213 err_vma:
1214         i915_vma_put(vma);
1215         return err;
1216 }
1217
1218 void lrc_reset(struct intel_context *ce)
1219 {
1220         GEM_BUG_ON(!intel_context_is_pinned(ce));
1221
1222         intel_ring_reset(ce->ring, ce->ring->emit);
1223
1224         /* Scrub away the garbage */
1225         lrc_init_regs(ce, ce->engine, true);
1226         ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1227 }
1228
1229 int
1230 lrc_pre_pin(struct intel_context *ce,
1231             struct intel_engine_cs *engine,
1232             struct i915_gem_ww_ctx *ww,
1233             void **vaddr)
1234 {
1235         GEM_BUG_ON(!ce->state);
1236         GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1237
1238         *vaddr = i915_gem_object_pin_map(ce->state->obj,
1239                                          intel_gt_coherent_map_type(ce->engine->gt,
1240                                                                     ce->state->obj,
1241                                                                     false) |
1242                                          I915_MAP_OVERRIDE);
1243
1244         return PTR_ERR_OR_ZERO(*vaddr);
1245 }
1246
1247 int
1248 lrc_pin(struct intel_context *ce,
1249         struct intel_engine_cs *engine,
1250         void *vaddr)
1251 {
1252         ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1253
1254         if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1255                 lrc_init_state(ce, engine, vaddr);
1256
1257         ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1258         return 0;
1259 }
1260
1261 void lrc_unpin(struct intel_context *ce)
1262 {
1263         if (unlikely(ce->parallel.last_rq)) {
1264                 i915_request_put(ce->parallel.last_rq);
1265                 ce->parallel.last_rq = NULL;
1266         }
1267         check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1268                       ce->engine);
1269 }
1270
1271 void lrc_post_unpin(struct intel_context *ce)
1272 {
1273         i915_gem_object_unpin_map(ce->state->obj);
1274 }
1275
1276 void lrc_fini(struct intel_context *ce)
1277 {
1278         if (!ce->state)
1279                 return;
1280
1281         intel_ring_put(fetch_and_zero(&ce->ring));
1282         i915_vma_put(fetch_and_zero(&ce->state));
1283 }
1284
1285 void lrc_destroy(struct kref *kref)
1286 {
1287         struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1288
1289         GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1290         GEM_BUG_ON(intel_context_is_pinned(ce));
1291
1292         lrc_fini(ce);
1293
1294         intel_context_fini(ce);
1295         intel_context_free(ce);
1296 }
1297
1298 static u32 *
1299 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1300 {
1301         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1302                 MI_SRM_LRM_GLOBAL_GTT |
1303                 MI_LRI_LRM_CS_MMIO;
1304         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1305         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1306                 CTX_TIMESTAMP * sizeof(u32);
1307         *cs++ = 0;
1308
1309         *cs++ = MI_LOAD_REGISTER_REG |
1310                 MI_LRR_SOURCE_CS_MMIO |
1311                 MI_LRI_LRM_CS_MMIO;
1312         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1313         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1314
1315         *cs++ = MI_LOAD_REGISTER_REG |
1316                 MI_LRR_SOURCE_CS_MMIO |
1317                 MI_LRI_LRM_CS_MMIO;
1318         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1319         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1320
1321         return cs;
1322 }
1323
1324 static u32 *
1325 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1326 {
1327         GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1328
1329         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1330                 MI_SRM_LRM_GLOBAL_GTT |
1331                 MI_LRI_LRM_CS_MMIO;
1332         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1333         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1334                 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1335         *cs++ = 0;
1336
1337         return cs;
1338 }
1339
1340 static u32 *
1341 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1342 {
1343         GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1344
1345         *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1346                 MI_SRM_LRM_GLOBAL_GTT |
1347                 MI_LRI_LRM_CS_MMIO;
1348         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1349         *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1350                 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1351         *cs++ = 0;
1352
1353         *cs++ = MI_LOAD_REGISTER_REG |
1354                 MI_LRR_SOURCE_CS_MMIO |
1355                 MI_LRI_LRM_CS_MMIO;
1356         *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1357         *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1358
1359         return cs;
1360 }
1361
1362 /*
1363  * The bspec's tuning guide asks us to program a vertical watermark value of
1364  * 0x3FF.  However this register is not saved/restored properly by the
1365  * hardware, so we're required to apply the desired value via INDIRECT_CTX
1366  * batch buffer to ensure the value takes effect properly.  All other bits
1367  * in this register should remain at 0 (the hardware default).
1368  */
1369 static u32 *
1370 dg2_emit_draw_watermark_setting(u32 *cs)
1371 {
1372         *cs++ = MI_LOAD_REGISTER_IMM(1);
1373         *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1374         *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
1375
1376         return cs;
1377 }
1378
1379 static u32 *
1380 gen12_invalidate_state_cache(u32 *cs)
1381 {
1382         *cs++ = MI_LOAD_REGISTER_IMM(1);
1383         *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
1384         *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
1385         return cs;
1386 }
1387
1388 static u32 *
1389 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1390 {
1391         cs = gen12_emit_timestamp_wa(ce, cs);
1392         cs = gen12_emit_cmd_buf_wa(ce, cs);
1393         cs = gen12_emit_restore_scratch(ce, cs);
1394
1395         /* Wa_16013000631:dg2 */
1396         if (IS_DG2_G11(ce->engine->i915))
1397                 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
1398
1399         cs = gen12_emit_aux_table_inv(ce->engine, cs);
1400
1401         /* Wa_18022495364 */
1402         if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
1403                 cs = gen12_invalidate_state_cache(cs);
1404
1405         /* Wa_16014892111 */
1406         if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1407             IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
1408             IS_DG2(ce->engine->i915))
1409                 cs = dg2_emit_draw_watermark_setting(cs);
1410
1411         return cs;
1412 }
1413
1414 static u32 *
1415 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1416 {
1417         cs = gen12_emit_timestamp_wa(ce, cs);
1418         cs = gen12_emit_restore_scratch(ce, cs);
1419
1420         /* Wa_16013000631:dg2 */
1421         if (IS_DG2_G11(ce->engine->i915))
1422                 if (ce->engine->class == COMPUTE_CLASS)
1423                         cs = gen8_emit_pipe_control(cs,
1424                                                     PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1425                                                     0);
1426
1427         return gen12_emit_aux_table_inv(ce->engine, cs);
1428 }
1429
1430 static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
1431 {
1432         struct intel_gt *gt = ce->engine->gt;
1433         int mocs = gt->mocs.uc_index << 1;
1434
1435         /**
1436          * Wa_16018031267 / Wa_16018063123 requires that SW forces the
1437          * main copy engine arbitration into round robin mode.  We
1438          * additionally need to submit the following WABB blt command
1439          * to produce 4 subblits with each subblit generating 0 byte
1440          * write requests as WABB:
1441          *
1442          * XY_FASTCOLOR_BLT
1443          *  BG0    -> 5100000E
1444          *  BG1    -> 0000003F (Dest pitch)
1445          *  BG2    -> 00000000 (X1, Y1) = (0, 0)
1446          *  BG3    -> 00040001 (X2, Y2) = (1, 4)
1447          *  BG4    -> scratch
1448          *  BG5    -> scratch
1449          *  BG6-12 -> 00000000
1450          *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
1451          *  BG14   -> 00000010 (Qpitch = 4)
1452          *  BG15   -> 00000000
1453          */
1454         *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
1455         *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
1456         *cs++ = 0;
1457         *cs++ = 4 << 16 | 1;
1458         *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1459         *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1460         *cs++ = 0;
1461         *cs++ = 0;
1462         *cs++ = 0;
1463         *cs++ = 0;
1464         *cs++ = 0;
1465         *cs++ = 0;
1466         *cs++ = 0;
1467         *cs++ = 0x20004004;
1468         *cs++ = 0x10;
1469         *cs++ = 0;
1470
1471         return cs;
1472 }
1473
1474 static u32 *
1475 xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
1476 {
1477         /* Wa_16018031267, Wa_16018063123 */
1478         if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
1479                 cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
1480
1481         return cs;
1482 }
1483
1484 static void
1485 setup_per_ctx_bb(const struct intel_context *ce,
1486                  const struct intel_engine_cs *engine,
1487                  u32 *(*emit)(const struct intel_context *, u32 *))
1488 {
1489         /* Place PER_CTX_BB on next page after INDIRECT_CTX */
1490         u32 * const start = context_wabb(ce, true);
1491         u32 *cs;
1492
1493         cs = emit(ce, start);
1494
1495         /* PER_CTX_BB must manually terminate */
1496         *cs++ = MI_BATCH_BUFFER_END;
1497
1498         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1499         lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
1500                              lrc_indirect_bb(ce) + PAGE_SIZE);
1501 }
1502
1503 static void
1504 setup_indirect_ctx_bb(const struct intel_context *ce,
1505                       const struct intel_engine_cs *engine,
1506                       u32 *(*emit)(const struct intel_context *, u32 *))
1507 {
1508         u32 * const start = context_wabb(ce, false);
1509         u32 *cs;
1510
1511         cs = emit(ce, start);
1512         GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1513         while ((unsigned long)cs % CACHELINE_BYTES)
1514                 *cs++ = MI_NOOP;
1515
1516         GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1517         setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1518
1519         lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1520                                lrc_indirect_bb(ce),
1521                                (cs - start) * sizeof(*cs));
1522 }
1523
1524 /*
1525  * The context descriptor encodes various attributes of a context,
1526  * including its GTT address and some flags. Because it's fairly
1527  * expensive to calculate, we'll just do it once and cache the result,
1528  * which remains valid until the context is unpinned.
1529  *
1530  * This is what a descriptor looks like, from LSB to MSB::
1531  *
1532  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1533  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1534  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1535  *      bits 53-54:    mbz, reserved for use by hardware
1536  *      bits 55-63:    group ID, currently unused and set to 0
1537  *
1538  * Starting from Gen11, the upper dword of the descriptor has a new format:
1539  *
1540  *      bits 32-36:    reserved
1541  *      bits 37-47:    SW context ID
1542  *      bits 48:53:    engine instance
1543  *      bit 54:        mbz, reserved for use by hardware
1544  *      bits 55-60:    SW counter
1545  *      bits 61-63:    engine class
1546  *
1547  * On Xe_HP, the upper dword of the descriptor has a new format:
1548  *
1549  *      bits 32-37:    virtual function number
1550  *      bit 38:        mbz, reserved for use by hardware
1551  *      bits 39-54:    SW context ID
1552  *      bits 55-57:    reserved
1553  *      bits 58-63:    SW counter
1554  *
1555  * engine info, SW context ID and SW counter need to form a unique number
1556  * (Context ID) per lrc.
1557  */
1558 static u32 lrc_descriptor(const struct intel_context *ce)
1559 {
1560         u32 desc;
1561
1562         desc = INTEL_LEGACY_32B_CONTEXT;
1563         if (i915_vm_is_4lvl(ce->vm))
1564                 desc = INTEL_LEGACY_64B_CONTEXT;
1565         desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1566
1567         desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1568         if (GRAPHICS_VER(ce->vm->i915) == 8)
1569                 desc |= GEN8_CTX_L3LLC_COHERENT;
1570
1571         return i915_ggtt_offset(ce->state) | desc;
1572 }
1573
1574 u32 lrc_update_regs(const struct intel_context *ce,
1575                     const struct intel_engine_cs *engine,
1576                     u32 head)
1577 {
1578         struct intel_ring *ring = ce->ring;
1579         u32 *regs = ce->lrc_reg_state;
1580
1581         GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1582         GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1583
1584         regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1585         regs[CTX_RING_HEAD] = head;
1586         regs[CTX_RING_TAIL] = ring->tail;
1587         regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1588
1589         /* RPCS */
1590         if (engine->class == RENDER_CLASS) {
1591                 regs[CTX_R_PWR_CLK_STATE] =
1592                         intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1593
1594                 i915_oa_init_reg_state(ce, engine);
1595         }
1596
1597         if (ce->wa_bb_page) {
1598                 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1599
1600                 fn = gen12_emit_indirect_ctx_xcs;
1601                 if (ce->engine->class == RENDER_CLASS)
1602                         fn = gen12_emit_indirect_ctx_rcs;
1603
1604                 /* Mutually exclusive wrt to global indirect bb */
1605                 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1606                 setup_indirect_ctx_bb(ce, engine, fn);
1607                 setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
1608         }
1609
1610         return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1611 }
1612
1613 void lrc_update_offsets(struct intel_context *ce,
1614                         struct intel_engine_cs *engine)
1615 {
1616         set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1617 }
1618
1619 void lrc_check_regs(const struct intel_context *ce,
1620                     const struct intel_engine_cs *engine,
1621                     const char *when)
1622 {
1623         const struct intel_ring *ring = ce->ring;
1624         u32 *regs = ce->lrc_reg_state;
1625         bool valid = true;
1626         int x;
1627
1628         if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1629                 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1630                        engine->name,
1631                        regs[CTX_RING_START],
1632                        i915_ggtt_offset(ring->vma));
1633                 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1634                 valid = false;
1635         }
1636
1637         if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1638             (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1639                 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1640                        engine->name,
1641                        regs[CTX_RING_CTL],
1642                        (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1643                 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1644                 valid = false;
1645         }
1646
1647         x = lrc_ring_mi_mode(engine);
1648         if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1649                 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1650                        engine->name, regs[x + 1]);
1651                 regs[x + 1] &= ~STOP_RING;
1652                 regs[x + 1] |= STOP_RING << 16;
1653                 valid = false;
1654         }
1655
1656         WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1657 }
1658
1659 /*
1660  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1661  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1662  * but there is a slight complication as this is applied in WA batch where the
1663  * values are only initialized once so we cannot take register value at the
1664  * beginning and reuse it further; hence we save its value to memory, upload a
1665  * constant value with bit21 set and then we restore it back with the saved value.
1666  * To simplify the WA, a constant value is formed by using the default value
1667  * of this register. This shouldn't be a problem because we are only modifying
1668  * it for a short period and this batch in non-premptible. We can ofcourse
1669  * use additional instructions that read the actual value of the register
1670  * at that time and set our bit of interest but it makes the WA complicated.
1671  *
1672  * This WA is also required for Gen9 so extracting as a function avoids
1673  * code duplication.
1674  */
1675 static u32 *
1676 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1677 {
1678         /* NB no one else is allowed to scribble over scratch + 256! */
1679         *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1680         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1681         *batch++ = intel_gt_scratch_offset(engine->gt,
1682                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1683         *batch++ = 0;
1684
1685         *batch++ = MI_LOAD_REGISTER_IMM(1);
1686         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1687         *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1688
1689         batch = gen8_emit_pipe_control(batch,
1690                                        PIPE_CONTROL_CS_STALL |
1691                                        PIPE_CONTROL_DC_FLUSH_ENABLE,
1692                                        0);
1693
1694         *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1695         *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1696         *batch++ = intel_gt_scratch_offset(engine->gt,
1697                                            INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1698         *batch++ = 0;
1699
1700         return batch;
1701 }
1702
1703 /*
1704  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1705  * initialized at the beginning and shared across all contexts but this field
1706  * helps us to have multiple batches at different offsets and select them based
1707  * on a criteria. At the moment this batch always start at the beginning of the page
1708  * and at this point we don't have multiple wa_ctx batch buffers.
1709  *
1710  * The number of WA applied are not known at the beginning; we use this field
1711  * to return the no of DWORDS written.
1712  *
1713  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1714  * so it adds NOOPs as padding to make it cacheline aligned.
1715  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1716  * makes a complete batch buffer.
1717  */
1718 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1719 {
1720         /* WaDisableCtxRestoreArbitration:bdw,chv */
1721         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1722
1723         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1724         if (IS_BROADWELL(engine->i915))
1725                 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1726
1727         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1728         /* Actual scratch location is at 128 bytes offset */
1729         batch = gen8_emit_pipe_control(batch,
1730                                        PIPE_CONTROL_FLUSH_L3 |
1731                                        PIPE_CONTROL_STORE_DATA_INDEX |
1732                                        PIPE_CONTROL_CS_STALL |
1733                                        PIPE_CONTROL_QW_WRITE,
1734                                        LRC_PPHWSP_SCRATCH_ADDR);
1735
1736         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1737
1738         /* Pad to end of cacheline */
1739         while ((unsigned long)batch % CACHELINE_BYTES)
1740                 *batch++ = MI_NOOP;
1741
1742         /*
1743          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1744          * execution depends on the length specified in terms of cache lines
1745          * in the register CTX_RCS_INDIRECT_CTX
1746          */
1747
1748         return batch;
1749 }
1750
1751 struct lri {
1752         i915_reg_t reg;
1753         u32 value;
1754 };
1755
1756 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1757 {
1758         GEM_BUG_ON(!count || count > 63);
1759
1760         *batch++ = MI_LOAD_REGISTER_IMM(count);
1761         do {
1762                 *batch++ = i915_mmio_reg_offset(lri->reg);
1763                 *batch++ = lri->value;
1764         } while (lri++, --count);
1765         *batch++ = MI_NOOP;
1766
1767         return batch;
1768 }
1769
1770 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1771 {
1772         static const struct lri lri[] = {
1773                 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1774                 {
1775                         COMMON_SLICE_CHICKEN2,
1776                         __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1777                                        0),
1778                 },
1779
1780                 /* BSpec: 11391 */
1781                 {
1782                         FF_SLICE_CHICKEN,
1783                         __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1784                                        FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1785                 },
1786
1787                 /* BSpec: 11299 */
1788                 {
1789                         _3D_CHICKEN3,
1790                         __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1791                                        _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1792                 }
1793         };
1794
1795         *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1796
1797         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1798         batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1799
1800         /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1801         batch = gen8_emit_pipe_control(batch,
1802                                        PIPE_CONTROL_FLUSH_L3 |
1803                                        PIPE_CONTROL_STORE_DATA_INDEX |
1804                                        PIPE_CONTROL_CS_STALL |
1805                                        PIPE_CONTROL_QW_WRITE,
1806                                        LRC_PPHWSP_SCRATCH_ADDR);
1807
1808         batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1809
1810         /* WaMediaPoolStateCmdInWABB:bxt,glk */
1811         if (HAS_POOLED_EU(engine->i915)) {
1812                 /*
1813                  * EU pool configuration is setup along with golden context
1814                  * during context initialization. This value depends on
1815                  * device type (2x6 or 3x6) and needs to be updated based
1816                  * on which subslice is disabled especially for 2x6
1817                  * devices, however it is safe to load default
1818                  * configuration of 3x6 device instead of masking off
1819                  * corresponding bits because HW ignores bits of a disabled
1820                  * subslice and drops down to appropriate config. Please
1821                  * see render_state_setup() in i915_gem_render_state.c for
1822                  * possible configurations, to avoid duplication they are
1823                  * not shown here again.
1824                  */
1825                 *batch++ = GEN9_MEDIA_POOL_STATE;
1826                 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1827                 *batch++ = 0x00777000;
1828                 *batch++ = 0;
1829                 *batch++ = 0;
1830                 *batch++ = 0;
1831         }
1832
1833         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1834
1835         /* Pad to end of cacheline */
1836         while ((unsigned long)batch % CACHELINE_BYTES)
1837                 *batch++ = MI_NOOP;
1838
1839         return batch;
1840 }
1841
1842 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1843
1844 static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1845 {
1846         struct drm_i915_gem_object *obj;
1847         struct i915_vma *vma;
1848         int err;
1849
1850         obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1851         if (IS_ERR(obj))
1852                 return PTR_ERR(obj);
1853
1854         vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1855         if (IS_ERR(vma)) {
1856                 err = PTR_ERR(vma);
1857                 goto err;
1858         }
1859
1860         engine->wa_ctx.vma = vma;
1861         return 0;
1862
1863 err:
1864         i915_gem_object_put(obj);
1865         return err;
1866 }
1867
1868 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1869 {
1870         i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1871 }
1872
1873 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1874
1875 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1876 {
1877         struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1878         struct i915_wa_ctx_bb *wa_bb[] = {
1879                 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1880         };
1881         wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1882         struct i915_gem_ww_ctx ww;
1883         void *batch, *batch_ptr;
1884         unsigned int i;
1885         int err;
1886
1887         if (GRAPHICS_VER(engine->i915) >= 11 ||
1888             !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1889                 return;
1890
1891         if (GRAPHICS_VER(engine->i915) == 9) {
1892                 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1893                 wa_bb_fn[1] = NULL;
1894         } else if (GRAPHICS_VER(engine->i915) == 8) {
1895                 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1896                 wa_bb_fn[1] = NULL;
1897         }
1898
1899         err = lrc_create_wa_ctx(engine);
1900         if (err) {
1901                 /*
1902                  * We continue even if we fail to initialize WA batch
1903                  * because we only expect rare glitches but nothing
1904                  * critical to prevent us from using GPU
1905                  */
1906                 drm_err(&engine->i915->drm,
1907                         "Ignoring context switch w/a allocation error:%d\n",
1908                         err);
1909                 return;
1910         }
1911
1912         if (!engine->wa_ctx.vma)
1913                 return;
1914
1915         i915_gem_ww_ctx_init(&ww, true);
1916 retry:
1917         err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1918         if (!err)
1919                 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1920         if (err)
1921                 goto err;
1922
1923         batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1924         if (IS_ERR(batch)) {
1925                 err = PTR_ERR(batch);
1926                 goto err_unpin;
1927         }
1928
1929         /*
1930          * Emit the two workaround batch buffers, recording the offset from the
1931          * start of the workaround batch buffer object for each and their
1932          * respective sizes.
1933          */
1934         batch_ptr = batch;
1935         for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1936                 wa_bb[i]->offset = batch_ptr - batch;
1937                 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1938                                                   CACHELINE_BYTES))) {
1939                         err = -EINVAL;
1940                         break;
1941                 }
1942                 if (wa_bb_fn[i])
1943                         batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1944                 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1945         }
1946         GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1947
1948         __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1949         __i915_gem_object_release_map(wa_ctx->vma->obj);
1950
1951         /* Verify that we can handle failure to setup the wa_ctx */
1952         if (!err)
1953                 err = i915_inject_probe_error(engine->i915, -ENODEV);
1954
1955 err_unpin:
1956         if (err)
1957                 i915_vma_unpin(wa_ctx->vma);
1958 err:
1959         if (err == -EDEADLK) {
1960                 err = i915_gem_ww_ctx_backoff(&ww);
1961                 if (!err)
1962                         goto retry;
1963         }
1964         i915_gem_ww_ctx_fini(&ww);
1965
1966         if (err) {
1967                 i915_vma_put(engine->wa_ctx.vma);
1968
1969                 /* Clear all flags to prevent further use */
1970                 memset(wa_ctx, 0, sizeof(*wa_ctx));
1971         }
1972 }
1973
1974 static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1975 {
1976 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1977         stats->runtime.num_underflow++;
1978         stats->runtime.max_underflow =
1979                 max_t(u32, stats->runtime.max_underflow, -dt);
1980 #endif
1981 }
1982
1983 static u32 lrc_get_runtime(const struct intel_context *ce)
1984 {
1985         /*
1986          * We can use either ppHWSP[16] which is recorded before the context
1987          * switch (and so excludes the cost of context switches) or use the
1988          * value from the context image itself, which is saved/restored earlier
1989          * and so includes the cost of the save.
1990          */
1991         return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1992 }
1993
1994 void lrc_update_runtime(struct intel_context *ce)
1995 {
1996         struct intel_context_stats *stats = &ce->stats;
1997         u32 old;
1998         s32 dt;
1999
2000         old = stats->runtime.last;
2001         stats->runtime.last = lrc_get_runtime(ce);
2002         dt = stats->runtime.last - old;
2003         if (!dt)
2004                 return;
2005
2006         if (unlikely(dt < 0)) {
2007                 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
2008                          old, stats->runtime.last, dt);
2009                 st_runtime_underflow(stats, dt);
2010                 return;
2011         }
2012
2013         ewma_runtime_add(&stats->runtime.avg, dt);
2014         stats->runtime.total += dt;
2015 }
2016
2017 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2018 #include "selftest_lrc.c"
2019 #endif