drivers/gpu/drm/i915/i915_gem_gtt.c

   1 /*
   2  * Copyright © 2010 Daniel Vetter
   3  * Copyright © 2011-2014 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  */
  25
  26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
  27
  28 #include <linux/fault-inject.h>
  29 #include <linux/log2.h>
  30 #include <linux/random.h>
  31 #include <linux/seq_file.h>
  32 #include <linux/stop_machine.h>
  33
  34 #include <asm/set_memory.h>
  35
  36 #include <drm/drmP.h>
  37 #include <drm/i915_drm.h>
  38
  39 #include "i915_drv.h"
  40 #include "i915_vgpu.h"
  41 #include "i915_trace.h"
  42 #include "intel_drv.h"
  43 #include "intel_frontbuffer.h"
  44
  45 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
  46
  47 /**
  48  * DOC: Global GTT views
  49  *
  50  * Background and previous state
  51  *
  52  * Historically objects could exists (be bound) in global GTT space only as
  53  * singular instances with a view representing all of the object's backing pages
  54  * in a linear fashion. This view will be called a normal view.
  55  *
  56  * To support multiple views of the same object, where the number of mapped
  57  * pages is not equal to the backing store, or where the layout of the pages
  58  * is not linear, concept of a GGTT view was added.
  59  *
  60  * One example of an alternative view is a stereo display driven by a single
  61  * image. In this case we would have a framebuffer looking like this
  62  * (2x2 pages):
  63  *
  64  *    12
  65  *    34
  66  *
  67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  68  * rendering. In contrast, fed to the display engine would be an alternative
  69  * view which could look something like this:
  70  *
  71  *   1212
  72  *   3434
  73  *
  74  * In this example both the size and layout of pages in the alternative view is
  75  * different from the normal view.
  76  *
  77  * Implementation and usage
  78  *
  79  * GGTT views are implemented using VMAs and are distinguished via enum
  80  * i915_ggtt_view_type and struct i915_ggtt_view.
  81  *
  82  * A new flavour of core GEM functions which work with GGTT bound objects were
  83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
  85  * parameter encapsulating all metadata required to implement a view.
  86  *
  87  * As a helper for callers which are only interested in the normal view,
  88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
  89  * GEM API functions, the ones not taking the view parameter, are operating on,
  90  * or with the normal GGTT view.
  91  *
  92  * Code wanting to add or use a new GGTT view needs to:
  93  *
  94  * 1. Add a new enum with a suitable name.
  95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
  96  * 3. Add support to i915_get_vma_pages().
  97  *
  98  * New views are required to build a scatter-gather table from within the
  99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 100  * exists for the lifetime of an VMA.
 101  *
 102  * Core API is designed to have copy semantics which means that passed in
 103  * struct i915_ggtt_view does not need to be persistent (left around after
 104  * calling the core API functions).
 105  *
 106  */
 107
 108 static int
 109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
 110
 111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
 112 {
 113         /*
 114          * Note that as an uncached mmio write, this will flush the
 115          * WCB of the writes into the GGTT before it triggers the invalidate.
 116          */
 117         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 118 }
 119
 120 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
 121 {
 122         gen6_ggtt_invalidate(dev_priv);
 123         I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 124 }
 125
 126 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
 127 {
 128         intel_gtt_chipset_flush();
 129 }
 130
 131 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
 132 {
 133         i915->ggtt.invalidate(i915);
 134 }
 135
 136 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 137                                 int enable_ppgtt)
 138 {
 139         bool has_full_ppgtt;
 140         bool has_full_48bit_ppgtt;
 141
 142         if (!dev_priv->info.has_aliasing_ppgtt)
 143                 return 0;
 144
 145         has_full_ppgtt = dev_priv->info.has_full_ppgtt;
 146         has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
 147
 148         if (intel_vgpu_active(dev_priv)) {
 149                 /* GVT-g has no support for 32bit ppgtt */
 150                 has_full_ppgtt = false;
 151                 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
 152         }
 153
 154         /*
 155          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
 156          * execlists, the sole mechanism available to submit work.
 157          */
 158         if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
 159                 return 0;
 160
 161         if (enable_ppgtt == 1)
 162                 return 1;
 163
 164         if (enable_ppgtt == 2 && has_full_ppgtt)
 165                 return 2;
 166
 167         if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
 168                 return 3;
 169
 170         /* Disable ppgtt on SNB if VT-d is on. */
 171         if (IS_GEN6(dev_priv) && intel_vtd_active()) {
 172                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
 173                 return 0;
 174         }
 175
 176         /* Early VLV doesn't have this */
 177         if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
 178                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
 179                 return 0;
 180         }
 181
 182         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 183                 if (has_full_48bit_ppgtt)
 184                         return 3;
 185
 186                 if (has_full_ppgtt)
 187                         return 2;
 188         }
 189
 190         return 1;
 191 }
 192
 193 static int ppgtt_bind_vma(struct i915_vma *vma,
 194                           enum i915_cache_level cache_level,
 195                           u32 unused)
 196 {
 197         u32 pte_flags;
 198         int ret;
 199
 200         if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
 201                 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
 202                                                  vma->size);
 203                 if (ret)
 204                         return ret;
 205         }
 206
 207         /* Currently applicable only to VLV */
 208         pte_flags = 0;
 209         if (vma->obj->gt_ro)
 210                 pte_flags |= PTE_READ_ONLY;
 211
 212         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 213
 214         return 0;
 215 }
 216
 217 static void ppgtt_unbind_vma(struct i915_vma *vma)
 218 {
 219         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 220 }
 221
 222 static int ppgtt_set_pages(struct i915_vma *vma)
 223 {
 224         GEM_BUG_ON(vma->pages);
 225
 226         vma->pages = vma->obj->mm.pages;
 227
 228         vma->page_sizes = vma->obj->mm.page_sizes;
 229
 230         return 0;
 231 }
 232
 233 static void clear_pages(struct i915_vma *vma)
 234 {
 235         GEM_BUG_ON(!vma->pages);
 236
 237         if (vma->pages != vma->obj->mm.pages) {
 238                 sg_free_table(vma->pages);
 239                 kfree(vma->pages);
 240         }
 241         vma->pages = NULL;
 242
 243         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
 244 }
 245
 246 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 247                                   enum i915_cache_level level)
 248 {
 249         gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
 250         pte |= addr;
 251
 252         switch (level) {
 253         case I915_CACHE_NONE:
 254                 pte |= PPAT_UNCACHED;
 255                 break;
 256         case I915_CACHE_WT:
 257                 pte |= PPAT_DISPLAY_ELLC;
 258                 break;
 259         default:
 260                 pte |= PPAT_CACHED;
 261                 break;
 262         }
 263
 264         return pte;
 265 }
 266
 267 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 268                                   const enum i915_cache_level level)
 269 {
 270         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 271         pde |= addr;
 272         if (level != I915_CACHE_NONE)
 273                 pde |= PPAT_CACHED_PDE;
 274         else
 275                 pde |= PPAT_UNCACHED;
 276         return pde;
 277 }
 278
 279 #define gen8_pdpe_encode gen8_pde_encode
 280 #define gen8_pml4e_encode gen8_pde_encode
 281
 282 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 283                                  enum i915_cache_level level,
 284                                  u32 unused)
 285 {
 286         gen6_pte_t pte = GEN6_PTE_VALID;
 287         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 288
 289         switch (level) {
 290         case I915_CACHE_L3_LLC:
 291         case I915_CACHE_LLC:
 292                 pte |= GEN6_PTE_CACHE_LLC;
 293                 break;
 294         case I915_CACHE_NONE:
 295                 pte |= GEN6_PTE_UNCACHED;
 296                 break;
 297         default:
 298                 MISSING_CASE(level);
 299         }
 300
 301         return pte;
 302 }
 303
 304 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
 305                                  enum i915_cache_level level,
 306                                  u32 unused)
 307 {
 308         gen6_pte_t pte = GEN6_PTE_VALID;
 309         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 310
 311         switch (level) {
 312         case I915_CACHE_L3_LLC:
 313                 pte |= GEN7_PTE_CACHE_L3_LLC;
 314                 break;
 315         case I915_CACHE_LLC:
 316                 pte |= GEN6_PTE_CACHE_LLC;
 317                 break;
 318         case I915_CACHE_NONE:
 319                 pte |= GEN6_PTE_UNCACHED;
 320                 break;
 321         default:
 322                 MISSING_CASE(level);
 323         }
 324
 325         return pte;
 326 }
 327
 328 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
 329                                  enum i915_cache_level level,
 330                                  u32 flags)
 331 {
 332         gen6_pte_t pte = GEN6_PTE_VALID;
 333         pte |= GEN6_PTE_ADDR_ENCODE(addr);
 334
 335         if (!(flags & PTE_READ_ONLY))
 336                 pte |= BYT_PTE_WRITEABLE;
 337
 338         if (level != I915_CACHE_NONE)
 339                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 340
 341         return pte;
 342 }
 343
 344 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
 345                                  enum i915_cache_level level,
 346                                  u32 unused)
 347 {
 348         gen6_pte_t pte = GEN6_PTE_VALID;
 349         pte |= HSW_PTE_ADDR_ENCODE(addr);
 350
 351         if (level != I915_CACHE_NONE)
 352                 pte |= HSW_WB_LLC_AGE3;
 353
 354         return pte;
 355 }
 356
 357 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 358                                   enum i915_cache_level level,
 359                                   u32 unused)
 360 {
 361         gen6_pte_t pte = GEN6_PTE_VALID;
 362         pte |= HSW_PTE_ADDR_ENCODE(addr);
 363
 364         switch (level) {
 365         case I915_CACHE_NONE:
 366                 break;
 367         case I915_CACHE_WT:
 368                 pte |= HSW_WT_ELLC_LLC_AGE3;
 369                 break;
 370         default:
 371                 pte |= HSW_WB_ELLC_LLC_AGE3;
 372                 break;
 373         }
 374
 375         return pte;
 376 }
 377
 378 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 379 {
 380         struct pagevec *pvec = &vm->free_pages;
 381         struct pagevec stash;
 382
 383         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
 384                 i915_gem_shrink_all(vm->i915);
 385
 386         if (likely(pvec->nr))
 387                 return pvec->pages[--pvec->nr];
 388
 389         if (!vm->pt_kmap_wc)
 390                 return alloc_page(gfp);
 391
 392         /* A placeholder for a specific mutex to guard the WC stash */
 393         lockdep_assert_held(&vm->i915->drm.struct_mutex);
 394
 395         /* Look in our global stash of WC pages... */
 396         pvec = &vm->i915->mm.wc_stash;
 397         if (likely(pvec->nr))
 398                 return pvec->pages[--pvec->nr];
 399
 400         /*
 401          * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
 402          *
 403          * We have to be careful as page allocation may trigger the shrinker
 404          * (via direct reclaim) which will fill up the WC stash underneath us.
 405          * So we add our WB pages into a temporary pvec on the stack and merge
 406          * them into the WC stash after all the allocations are complete.
 407          */
 408         pagevec_init(&stash);
 409         do {
 410                 struct page *page;
 411
 412                 page = alloc_page(gfp);
 413                 if (unlikely(!page))
 414                         break;
 415
 416                 stash.pages[stash.nr++] = page;
 417         } while (stash.nr < pagevec_space(pvec));
 418
 419         if (stash.nr) {
 420                 int nr = min_t(int, stash.nr, pagevec_space(pvec));
 421                 struct page **pages = stash.pages + stash.nr - nr;
 422
 423                 if (nr && !set_pages_array_wc(pages, nr)) {
 424                         memcpy(pvec->pages + pvec->nr,
 425                                pages, sizeof(pages[0]) * nr);
 426                         pvec->nr += nr;
 427                         stash.nr -= nr;
 428                 }
 429
 430                 pagevec_release(&stash);
 431         }
 432
 433         return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL;
 434 }
 435
 436 static void vm_free_pages_release(struct i915_address_space *vm,
 437                                   bool immediate)
 438 {
 439         struct pagevec *pvec = &vm->free_pages;
 440
 441         GEM_BUG_ON(!pagevec_count(pvec));
 442
 443         if (vm->pt_kmap_wc) {
 444                 struct pagevec *stash = &vm->i915->mm.wc_stash;
 445
 446                 /* When we use WC, first fill up the global stash and then
 447                  * only if full immediately free the overflow.
 448                  */
 449
 450                 lockdep_assert_held(&vm->i915->drm.struct_mutex);
 451                 if (pagevec_space(stash)) {
 452                         do {
 453                                 stash->pages[stash->nr++] =
 454                                         pvec->pages[--pvec->nr];
 455                                 if (!pvec->nr)
 456                                         return;
 457                         } while (pagevec_space(stash));
 458
 459                         /* As we have made some room in the VM's free_pages,
 460                          * we can wait for it to fill again. Unless we are
 461                          * inside i915_address_space_fini() and must
 462                          * immediately release the pages!
 463                          */
 464                         if (!immediate)
 465                                 return;
 466                 }
 467
 468                 set_pages_array_wb(pvec->pages, pvec->nr);
 469         }
 470
 471         __pagevec_release(pvec);
 472 }
 473
 474 static void vm_free_page(struct i915_address_space *vm, struct page *page)
 475 {
 476         /*
 477          * On !llc, we need to change the pages back to WB. We only do so
 478          * in bulk, so we rarely need to change the page attributes here,
 479          * but doing so requires a stop_machine() from deep inside arch/x86/mm.
 480          * To make detection of the possible sleep more likely, use an
 481          * unconditional might_sleep() for everybody.
 482          */
 483         might_sleep();
 484         if (!pagevec_add(&vm->free_pages, page))
 485                 vm_free_pages_release(vm, false);
 486 }
 487
 488 static int __setup_page_dma(struct i915_address_space *vm,
 489                             struct i915_page_dma *p,
 490                             gfp_t gfp)
 491 {
 492         p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
 493         if (unlikely(!p->page))
 494                 return -ENOMEM;
 495
 496         p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
 497                                 PCI_DMA_BIDIRECTIONAL);
 498         if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
 499                 vm_free_page(vm, p->page);
 500                 return -ENOMEM;
 501         }
 502
 503         return 0;
 504 }
 505
 506 static int setup_page_dma(struct i915_address_space *vm,
 507                           struct i915_page_dma *p)
 508 {
 509         return __setup_page_dma(vm, p, I915_GFP_DMA);
 510 }
 511
 512 static void cleanup_page_dma(struct i915_address_space *vm,
 513                              struct i915_page_dma *p)
 514 {
 515         dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 516         vm_free_page(vm, p->page);
 517 }
 518
 519 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 520
 521 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
 522 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
 523 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
 524 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
 525
 526 static void fill_page_dma(struct i915_address_space *vm,
 527                           struct i915_page_dma *p,
 528                           const u64 val)
 529 {
 530         u64 * const vaddr = kmap_atomic(p->page);
 531
 532         memset64(vaddr, val, PAGE_SIZE / sizeof(val));
 533
 534         kunmap_atomic(vaddr);
 535 }
 536
 537 static void fill_page_dma_32(struct i915_address_space *vm,
 538                              struct i915_page_dma *p,
 539                              const u32 v)
 540 {
 541         fill_page_dma(vm, p, (u64)v << 32 | v);
 542 }
 543
 544 static int
 545 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 546 {
 547         unsigned long size;
 548
 549         /*
 550          * In order to utilize 64K pages for an object with a size < 2M, we will
 551          * need to support a 64K scratch page, given that every 16th entry for a
 552          * page-table operating in 64K mode must point to a properly aligned 64K
 553          * region, including any PTEs which happen to point to scratch.
 554          *
 555          * This is only relevant for the 48b PPGTT where we support
 556          * huge-gtt-pages, see also i915_vma_insert().
 557          *
 558          * TODO: we should really consider write-protecting the scratch-page and
 559          * sharing between ppgtt
 560          */
 561         size = I915_GTT_PAGE_SIZE_4K;
 562         if (i915_vm_is_48bit(vm) &&
 563             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
 564                 size = I915_GTT_PAGE_SIZE_64K;
 565                 gfp |= __GFP_NOWARN;
 566         }
 567         gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
 568
 569         do {
 570                 int order = get_order(size);
 571                 struct page *page;
 572                 dma_addr_t addr;
 573
 574                 page = alloc_pages(gfp, order);
 575                 if (unlikely(!page))
 576                         goto skip;
 577
 578                 addr = dma_map_page(vm->dma, page, 0, size,
 579                                     PCI_DMA_BIDIRECTIONAL);
 580                 if (unlikely(dma_mapping_error(vm->dma, addr)))
 581                         goto free_page;
 582
 583                 if (unlikely(!IS_ALIGNED(addr, size)))
 584                         goto unmap_page;
 585
 586                 vm->scratch_page.page = page;
 587                 vm->scratch_page.daddr = addr;
 588                 vm->scratch_page.order = order;
 589                 return 0;
 590
 591 unmap_page:
 592                 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
 593 free_page:
 594                 __free_pages(page, order);
 595 skip:
 596                 if (size == I915_GTT_PAGE_SIZE_4K)
 597                         return -ENOMEM;
 598
 599                 size = I915_GTT_PAGE_SIZE_4K;
 600                 gfp &= ~__GFP_NOWARN;
 601         } while (1);
 602 }
 603
 604 static void cleanup_scratch_page(struct i915_address_space *vm)
 605 {
 606         struct i915_page_dma *p = &vm->scratch_page;
 607
 608         dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
 609                        PCI_DMA_BIDIRECTIONAL);
 610         __free_pages(p->page, p->order);
 611 }
 612
 613 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 614 {
 615         struct i915_page_table *pt;
 616
 617         pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
 618         if (unlikely(!pt))
 619                 return ERR_PTR(-ENOMEM);
 620
 621         if (unlikely(setup_px(vm, pt))) {
 622                 kfree(pt);
 623                 return ERR_PTR(-ENOMEM);
 624         }
 625
 626         pt->used_ptes = 0;
 627         return pt;
 628 }
 629
 630 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 631 {
 632         cleanup_px(vm, pt);
 633         kfree(pt);
 634 }
 635
 636 static void gen8_initialize_pt(struct i915_address_space *vm,
 637                                struct i915_page_table *pt)
 638 {
 639         fill_px(vm, pt,
 640                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
 641 }
 642
 643 static void gen6_initialize_pt(struct i915_address_space *vm,
 644                                struct i915_page_table *pt)
 645 {
 646         fill32_px(vm, pt,
 647                   vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
 648 }
 649
 650 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 651 {
 652         struct i915_page_directory *pd;
 653
 654         pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
 655         if (unlikely(!pd))
 656                 return ERR_PTR(-ENOMEM);
 657
 658         if (unlikely(setup_px(vm, pd))) {
 659                 kfree(pd);
 660                 return ERR_PTR(-ENOMEM);
 661         }
 662
 663         pd->used_pdes = 0;
 664         return pd;
 665 }
 666
 667 static void free_pd(struct i915_address_space *vm,
 668                     struct i915_page_directory *pd)
 669 {
 670         cleanup_px(vm, pd);
 671         kfree(pd);
 672 }
 673
 674 static void gen8_initialize_pd(struct i915_address_space *vm,
 675                                struct i915_page_directory *pd)
 676 {
 677         fill_px(vm, pd,
 678                 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
 679         memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
 680 }
 681
 682 static int __pdp_init(struct i915_address_space *vm,
 683                       struct i915_page_directory_pointer *pdp)
 684 {
 685         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
 686
 687         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
 688                                             GFP_KERNEL | __GFP_NOWARN);
 689         if (unlikely(!pdp->page_directory))
 690                 return -ENOMEM;
 691
 692         memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
 693
 694         return 0;
 695 }
 696
 697 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 698 {
 699         kfree(pdp->page_directory);
 700         pdp->page_directory = NULL;
 701 }
 702
 703 static inline bool use_4lvl(const struct i915_address_space *vm)
 704 {
 705         return i915_vm_is_48bit(vm);
 706 }
 707
 708 static struct i915_page_directory_pointer *
 709 alloc_pdp(struct i915_address_space *vm)
 710 {
 711         struct i915_page_directory_pointer *pdp;
 712         int ret = -ENOMEM;
 713
 714         GEM_BUG_ON(!use_4lvl(vm));
 715
 716         pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
 717         if (!pdp)
 718                 return ERR_PTR(-ENOMEM);
 719
 720         ret = __pdp_init(vm, pdp);
 721         if (ret)
 722                 goto fail_bitmap;
 723
 724         ret = setup_px(vm, pdp);
 725         if (ret)
 726                 goto fail_page_m;
 727
 728         return pdp;
 729
 730 fail_page_m:
 731         __pdp_fini(pdp);
 732 fail_bitmap:
 733         kfree(pdp);
 734
 735         return ERR_PTR(ret);
 736 }
 737
 738 static void free_pdp(struct i915_address_space *vm,
 739                      struct i915_page_directory_pointer *pdp)
 740 {
 741         __pdp_fini(pdp);
 742
 743         if (!use_4lvl(vm))
 744                 return;
 745
 746         cleanup_px(vm, pdp);
 747         kfree(pdp);
 748 }
 749
 750 static void gen8_initialize_pdp(struct i915_address_space *vm,
 751                                 struct i915_page_directory_pointer *pdp)
 752 {
 753         gen8_ppgtt_pdpe_t scratch_pdpe;
 754
 755         scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 756
 757         fill_px(vm, pdp, scratch_pdpe);
 758 }
 759
 760 static void gen8_initialize_pml4(struct i915_address_space *vm,
 761                                  struct i915_pml4 *pml4)
 762 {
 763         fill_px(vm, pml4,
 764                 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
 765         memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
 766 }
 767
 768 /* Broadwell Page Directory Pointer Descriptors */
 769 static int gen8_write_pdp(struct i915_request *rq,
 770                           unsigned entry,
 771                           dma_addr_t addr)
 772 {
 773         struct intel_engine_cs *engine = rq->engine;
 774         u32 *cs;
 775
 776         BUG_ON(entry >= 4);
 777
 778         cs = intel_ring_begin(rq, 6);
 779         if (IS_ERR(cs))
 780                 return PTR_ERR(cs);
 781
 782         *cs++ = MI_LOAD_REGISTER_IMM(1);
 783         *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
 784         *cs++ = upper_32_bits(addr);
 785         *cs++ = MI_LOAD_REGISTER_IMM(1);
 786         *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
 787         *cs++ = lower_32_bits(addr);
 788         intel_ring_advance(rq, cs);
 789
 790         return 0;
 791 }
 792
 793 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
 794                                struct i915_request *rq)
 795 {
 796         int i, ret;
 797
 798         for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
 799                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
 800
 801                 ret = gen8_write_pdp(rq, i, pd_daddr);
 802                 if (ret)
 803                         return ret;
 804         }
 805
 806         return 0;
 807 }
 808
 809 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
 810                                struct i915_request *rq)
 811 {
 812         return gen8_write_pdp(rq, 0, px_dma(&ppgtt->pml4));
 813 }
 814
 815 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
 816  * the page table structures, we mark them dirty so that
 817  * context switching/execlist queuing code takes extra steps
 818  * to ensure that tlbs are flushed.
 819  */
 820 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
 821 {
 822         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
 823 }
 824
 825 /* Removes entries from a single page table, releasing it if it's empty.
 826  * Caller can use the return value to update higher-level entries.
 827  */
 828 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 829                                 struct i915_page_table *pt,
 830                                 u64 start, u64 length)
 831 {
 832         unsigned int num_entries = gen8_pte_count(start, length);
 833         unsigned int pte = gen8_pte_index(start);
 834         unsigned int pte_end = pte + num_entries;
 835         const gen8_pte_t scratch_pte =
 836                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
 837         gen8_pte_t *vaddr;
 838
 839         GEM_BUG_ON(num_entries > pt->used_ptes);
 840
 841         pt->used_ptes -= num_entries;
 842         if (!pt->used_ptes)
 843                 return true;
 844
 845         vaddr = kmap_atomic_px(pt);
 846         while (pte < pte_end)
 847                 vaddr[pte++] = scratch_pte;
 848         kunmap_atomic(vaddr);
 849
 850         return false;
 851 }
 852
 853 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 854                                struct i915_page_directory *pd,
 855                                struct i915_page_table *pt,
 856                                unsigned int pde)
 857 {
 858         gen8_pde_t *vaddr;
 859
 860         pd->page_table[pde] = pt;
 861
 862         vaddr = kmap_atomic_px(pd);
 863         vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
 864         kunmap_atomic(vaddr);
 865 }
 866
 867 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 868                                 struct i915_page_directory *pd,
 869                                 u64 start, u64 length)
 870 {
 871         struct i915_page_table *pt;
 872         u32 pde;
 873
 874         gen8_for_each_pde(pt, pd, start, length, pde) {
 875                 GEM_BUG_ON(pt == vm->scratch_pt);
 876
 877                 if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
 878                         continue;
 879
 880                 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
 881                 GEM_BUG_ON(!pd->used_pdes);
 882                 pd->used_pdes--;
 883
 884                 free_pt(vm, pt);
 885         }
 886
 887         return !pd->used_pdes;
 888 }
 889
 890 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
 891                                 struct i915_page_directory_pointer *pdp,
 892                                 struct i915_page_directory *pd,
 893                                 unsigned int pdpe)
 894 {
 895         gen8_ppgtt_pdpe_t *vaddr;
 896
 897         pdp->page_directory[pdpe] = pd;
 898         if (!use_4lvl(vm))
 899                 return;
 900
 901         vaddr = kmap_atomic_px(pdp);
 902         vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
 903         kunmap_atomic(vaddr);
 904 }
 905
 906 /* Removes entries from a single page dir pointer, releasing it if it's empty.
 907  * Caller can use the return value to update higher-level entries
 908  */
 909 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 910                                  struct i915_page_directory_pointer *pdp,
 911                                  u64 start, u64 length)
 912 {
 913         struct i915_page_directory *pd;
 914         unsigned int pdpe;
 915
 916         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 917                 GEM_BUG_ON(pd == vm->scratch_pd);
 918
 919                 if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
 920                         continue;
 921
 922                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
 923                 GEM_BUG_ON(!pdp->used_pdpes);
 924                 pdp->used_pdpes--;
 925
 926                 free_pd(vm, pd);
 927         }
 928
 929         return !pdp->used_pdpes;
 930 }
 931
 932 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 933                                   u64 start, u64 length)
 934 {
 935         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
 936 }
 937
 938 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
 939                                  struct i915_page_directory_pointer *pdp,
 940                                  unsigned int pml4e)
 941 {
 942         gen8_ppgtt_pml4e_t *vaddr;
 943
 944         pml4->pdps[pml4e] = pdp;
 945
 946         vaddr = kmap_atomic_px(pml4);
 947         vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 948         kunmap_atomic(vaddr);
 949 }
 950
 951 /* Removes entries from a single pml4.
 952  * This is the top-level structure in 4-level page tables used on gen8+.
 953  * Empty entries are always scratch pml4e.
 954  */
 955 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 956                                   u64 start, u64 length)
 957 {
 958         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 959         struct i915_pml4 *pml4 = &ppgtt->pml4;
 960         struct i915_page_directory_pointer *pdp;
 961         unsigned int pml4e;
 962
 963         GEM_BUG_ON(!use_4lvl(vm));
 964
 965         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 966                 GEM_BUG_ON(pdp == vm->scratch_pdp);
 967
 968                 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
 969                         continue;
 970
 971                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
 972
 973                 free_pdp(vm, pdp);
 974         }
 975 }
 976
 977 static inline struct sgt_dma {
 978         struct scatterlist *sg;
 979         dma_addr_t dma, max;
 980 } sgt_dma(struct i915_vma *vma) {
 981         struct scatterlist *sg = vma->pages->sgl;
 982         dma_addr_t addr = sg_dma_address(sg);
 983         return (struct sgt_dma) { sg, addr, addr + sg->length };
 984 }
 985
 986 struct gen8_insert_pte {
 987         u16 pml4e;
 988         u16 pdpe;
 989         u16 pde;
 990         u16 pte;
 991 };
 992
 993 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
 994 {
 995         return (struct gen8_insert_pte) {
 996                  gen8_pml4e_index(start),
 997                  gen8_pdpe_index(start),
 998                  gen8_pde_index(start),
 999                  gen8_pte_index(start),
1000         };
1001 }
1002
1003 static __always_inline bool
1004 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
1005                               struct i915_page_directory_pointer *pdp,
1006                               struct sgt_dma *iter,
1007                               struct gen8_insert_pte *idx,
1008                               enum i915_cache_level cache_level)
1009 {
1010         struct i915_page_directory *pd;
1011         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1012         gen8_pte_t *vaddr;
1013         bool ret;
1014
1015         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1016         pd = pdp->page_directory[idx->pdpe];
1017         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1018         do {
1019                 vaddr[idx->pte] = pte_encode | iter->dma;
1020
1021                 iter->dma += PAGE_SIZE;
1022                 if (iter->dma >= iter->max) {
1023                         iter->sg = __sg_next(iter->sg);
1024                         if (!iter->sg) {
1025                                 ret = false;
1026                                 break;
1027                         }
1028
1029                         iter->dma = sg_dma_address(iter->sg);
1030                         iter->max = iter->dma + iter->sg->length;
1031                 }
1032
1033                 if (++idx->pte == GEN8_PTES) {
1034                         idx->pte = 0;
1035
1036                         if (++idx->pde == I915_PDES) {
1037                                 idx->pde = 0;
1038
1039                                 /* Limited by sg length for 3lvl */
1040                                 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1041                                         idx->pdpe = 0;
1042                                         ret = true;
1043                                         break;
1044                                 }
1045
1046                                 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1047                                 pd = pdp->page_directory[idx->pdpe];
1048                         }
1049
1050                         kunmap_atomic(vaddr);
1051                         vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1052                 }
1053         } while (1);
1054         kunmap_atomic(vaddr);
1055
1056         return ret;
1057 }
1058
1059 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1060                                    struct i915_vma *vma,
1061                                    enum i915_cache_level cache_level,
1062                                    u32 unused)
1063 {
1064         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1065         struct sgt_dma iter = sgt_dma(vma);
1066         struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1067
1068         gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1069                                       cache_level);
1070
1071         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1072 }
1073
1074 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1075                                            struct i915_page_directory_pointer **pdps,
1076                                            struct sgt_dma *iter,
1077                                            enum i915_cache_level cache_level)
1078 {
1079         const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1080         u64 start = vma->node.start;
1081         dma_addr_t rem = iter->sg->length;
1082
1083         do {
1084                 struct gen8_insert_pte idx = gen8_insert_pte(start);
1085                 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1086                 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1087                 unsigned int page_size;
1088                 bool maybe_64K = false;
1089                 gen8_pte_t encode = pte_encode;
1090                 gen8_pte_t *vaddr;
1091                 u16 index, max;
1092
1093                 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1094                     IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1095                     rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1096                         index = idx.pde;
1097                         max = I915_PDES;
1098                         page_size = I915_GTT_PAGE_SIZE_2M;
1099
1100                         encode |= GEN8_PDE_PS_2M;
1101
1102                         vaddr = kmap_atomic_px(pd);
1103                 } else {
1104                         struct i915_page_table *pt = pd->page_table[idx.pde];
1105
1106                         index = idx.pte;
1107                         max = GEN8_PTES;
1108                         page_size = I915_GTT_PAGE_SIZE;
1109
1110                         if (!index &&
1111                             vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1112                             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1113                             (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1114                              rem >= (max - index) << PAGE_SHIFT))
1115                                 maybe_64K = true;
1116
1117                         vaddr = kmap_atomic_px(pt);
1118                 }
1119
1120                 do {
1121                         GEM_BUG_ON(iter->sg->length < page_size);
1122                         vaddr[index++] = encode | iter->dma;
1123
1124                         start += page_size;
1125                         iter->dma += page_size;
1126                         rem -= page_size;
1127                         if (iter->dma >= iter->max) {
1128                                 iter->sg = __sg_next(iter->sg);
1129                                 if (!iter->sg)
1130                                         break;
1131
1132                                 rem = iter->sg->length;
1133                                 iter->dma = sg_dma_address(iter->sg);
1134                                 iter->max = iter->dma + rem;
1135
1136                                 if (maybe_64K && index < max &&
1137                                     !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1138                                       (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1139                                        rem >= (max - index) << PAGE_SHIFT)))
1140                                         maybe_64K = false;
1141
1142                                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1143                                         break;
1144                         }
1145                 } while (rem >= page_size && index < max);
1146
1147                 kunmap_atomic(vaddr);
1148
1149                 /*
1150                  * Is it safe to mark the 2M block as 64K? -- Either we have
1151                  * filled whole page-table with 64K entries, or filled part of
1152                  * it and have reached the end of the sg table and we have
1153                  * enough padding.
1154                  */
1155                 if (maybe_64K &&
1156                     (index == max ||
1157                      (i915_vm_has_scratch_64K(vma->vm) &&
1158                       !iter->sg && IS_ALIGNED(vma->node.start +
1159                                               vma->node.size,
1160                                               I915_GTT_PAGE_SIZE_2M)))) {
1161                         vaddr = kmap_atomic_px(pd);
1162                         vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1163                         kunmap_atomic(vaddr);
1164                         page_size = I915_GTT_PAGE_SIZE_64K;
1165
1166                         /*
1167                          * We write all 4K page entries, even when using 64K
1168                          * pages. In order to verify that the HW isn't cheating
1169                          * by using the 4K PTE instead of the 64K PTE, we want
1170                          * to remove all the surplus entries. If the HW skipped
1171                          * the 64K PTE, it will read/write into the scratch page
1172                          * instead - which we detect as missing results during
1173                          * selftests.
1174                          */
1175                         if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1176                                 u16 i;
1177
1178                                 encode = pte_encode | vma->vm->scratch_page.daddr;
1179                                 vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
1180
1181                                 for (i = 1; i < index; i += 16)
1182                                         memset64(vaddr + i, encode, 15);
1183
1184                                 kunmap_atomic(vaddr);
1185                         }
1186                 }
1187
1188                 vma->page_sizes.gtt |= page_size;
1189         } while (iter->sg);
1190 }
1191
1192 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1193                                    struct i915_vma *vma,
1194                                    enum i915_cache_level cache_level,
1195                                    u32 unused)
1196 {
1197         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1198         struct sgt_dma iter = sgt_dma(vma);
1199         struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1200
1201         if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1202                 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
1203         } else {
1204                 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1205
1206                 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1207                                                      &iter, &idx, cache_level))
1208                         GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1209
1210                 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1211         }
1212 }
1213
1214 static void gen8_free_page_tables(struct i915_address_space *vm,
1215                                   struct i915_page_directory *pd)
1216 {
1217         int i;
1218
1219         if (!px_page(pd))
1220                 return;
1221
1222         for (i = 0; i < I915_PDES; i++) {
1223                 if (pd->page_table[i] != vm->scratch_pt)
1224                         free_pt(vm, pd->page_table[i]);
1225         }
1226 }
1227
1228 static int gen8_init_scratch(struct i915_address_space *vm)
1229 {
1230         int ret;
1231
1232         ret = setup_scratch_page(vm, I915_GFP_DMA);
1233         if (ret)
1234                 return ret;
1235
1236         vm->scratch_pt = alloc_pt(vm);
1237         if (IS_ERR(vm->scratch_pt)) {
1238                 ret = PTR_ERR(vm->scratch_pt);
1239                 goto free_scratch_page;
1240         }
1241
1242         vm->scratch_pd = alloc_pd(vm);
1243         if (IS_ERR(vm->scratch_pd)) {
1244                 ret = PTR_ERR(vm->scratch_pd);
1245                 goto free_pt;
1246         }
1247
1248         if (use_4lvl(vm)) {
1249                 vm->scratch_pdp = alloc_pdp(vm);
1250                 if (IS_ERR(vm->scratch_pdp)) {
1251                         ret = PTR_ERR(vm->scratch_pdp);
1252                         goto free_pd;
1253                 }
1254         }
1255
1256         gen8_initialize_pt(vm, vm->scratch_pt);
1257         gen8_initialize_pd(vm, vm->scratch_pd);
1258         if (use_4lvl(vm))
1259                 gen8_initialize_pdp(vm, vm->scratch_pdp);
1260
1261         return 0;
1262
1263 free_pd:
1264         free_pd(vm, vm->scratch_pd);
1265 free_pt:
1266         free_pt(vm, vm->scratch_pt);
1267 free_scratch_page:
1268         cleanup_scratch_page(vm);
1269
1270         return ret;
1271 }
1272
1273 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1274 {
1275         struct i915_address_space *vm = &ppgtt->base;
1276         struct drm_i915_private *dev_priv = vm->i915;
1277         enum vgt_g2v_type msg;
1278         int i;
1279
1280         if (use_4lvl(vm)) {
1281                 const u64 daddr = px_dma(&ppgtt->pml4);
1282
1283                 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1284                 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1285
1286                 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1287                                 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1288         } else {
1289                 for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1290                         const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1291
1292                         I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1293                         I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1294                 }
1295
1296                 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1297                                 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1298         }
1299
1300         I915_WRITE(vgtif_reg(g2v_notify), msg);
1301
1302         return 0;
1303 }
1304
1305 static void gen8_free_scratch(struct i915_address_space *vm)
1306 {
1307         if (use_4lvl(vm))
1308                 free_pdp(vm, vm->scratch_pdp);
1309         free_pd(vm, vm->scratch_pd);
1310         free_pt(vm, vm->scratch_pt);
1311         cleanup_scratch_page(vm);
1312 }
1313
1314 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1315                                     struct i915_page_directory_pointer *pdp)
1316 {
1317         const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1318         int i;
1319
1320         for (i = 0; i < pdpes; i++) {
1321                 if (pdp->page_directory[i] == vm->scratch_pd)
1322                         continue;
1323
1324                 gen8_free_page_tables(vm, pdp->page_directory[i]);
1325                 free_pd(vm, pdp->page_directory[i]);
1326         }
1327
1328         free_pdp(vm, pdp);
1329 }
1330
1331 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1332 {
1333         int i;
1334
1335         for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1336                 if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
1337                         continue;
1338
1339                 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
1340         }
1341
1342         cleanup_px(&ppgtt->base, &ppgtt->pml4);
1343 }
1344
1345 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1346 {
1347         struct drm_i915_private *dev_priv = vm->i915;
1348         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1349
1350         if (intel_vgpu_active(dev_priv))
1351                 gen8_ppgtt_notify_vgt(ppgtt, false);
1352
1353         if (use_4lvl(vm))
1354                 gen8_ppgtt_cleanup_4lvl(ppgtt);
1355         else
1356                 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
1357
1358         gen8_free_scratch(vm);
1359 }
1360
1361 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1362                                struct i915_page_directory *pd,
1363                                u64 start, u64 length)
1364 {
1365         struct i915_page_table *pt;
1366         u64 from = start;
1367         unsigned int pde;
1368
1369         gen8_for_each_pde(pt, pd, start, length, pde) {
1370                 int count = gen8_pte_count(start, length);
1371
1372                 if (pt == vm->scratch_pt) {
1373                         pd->used_pdes++;
1374
1375                         pt = alloc_pt(vm);
1376                         if (IS_ERR(pt)) {
1377                                 pd->used_pdes--;
1378                                 goto unwind;
1379                         }
1380
1381                         if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1382                                 gen8_initialize_pt(vm, pt);
1383
1384                         gen8_ppgtt_set_pde(vm, pd, pt, pde);
1385                         GEM_BUG_ON(pd->used_pdes > I915_PDES);
1386                 }
1387
1388                 pt->used_ptes += count;
1389         }
1390         return 0;
1391
1392 unwind:
1393         gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1394         return -ENOMEM;
1395 }
1396
1397 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1398                                 struct i915_page_directory_pointer *pdp,
1399                                 u64 start, u64 length)
1400 {
1401         struct i915_page_directory *pd;
1402         u64 from = start;
1403         unsigned int pdpe;
1404         int ret;
1405
1406         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1407                 if (pd == vm->scratch_pd) {
1408                         pdp->used_pdpes++;
1409
1410                         pd = alloc_pd(vm);
1411                         if (IS_ERR(pd)) {
1412                                 pdp->used_pdpes--;
1413                                 goto unwind;
1414                         }
1415
1416                         gen8_initialize_pd(vm, pd);
1417                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1418                         GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1419
1420                         mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1421                 }
1422
1423                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1424                 if (unlikely(ret))
1425                         goto unwind_pd;
1426         }
1427
1428         return 0;
1429
1430 unwind_pd:
1431         if (!pd->used_pdes) {
1432                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1433                 GEM_BUG_ON(!pdp->used_pdpes);
1434                 pdp->used_pdpes--;
1435                 free_pd(vm, pd);
1436         }
1437 unwind:
1438         gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1439         return -ENOMEM;
1440 }
1441
1442 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1443                                  u64 start, u64 length)
1444 {
1445         return gen8_ppgtt_alloc_pdp(vm,
1446                                     &i915_vm_to_ppgtt(vm)->pdp, start, length);
1447 }
1448
1449 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1450                                  u64 start, u64 length)
1451 {
1452         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1453         struct i915_pml4 *pml4 = &ppgtt->pml4;
1454         struct i915_page_directory_pointer *pdp;
1455         u64 from = start;
1456         u32 pml4e;
1457         int ret;
1458
1459         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1460                 if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1461                         pdp = alloc_pdp(vm);
1462                         if (IS_ERR(pdp))
1463                                 goto unwind;
1464
1465                         gen8_initialize_pdp(vm, pdp);
1466                         gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1467                 }
1468
1469                 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1470                 if (unlikely(ret))
1471                         goto unwind_pdp;
1472         }
1473
1474         return 0;
1475
1476 unwind_pdp:
1477         if (!pdp->used_pdpes) {
1478                 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1479                 free_pdp(vm, pdp);
1480         }
1481 unwind:
1482         gen8_ppgtt_clear_4lvl(vm, from, start - from);
1483         return -ENOMEM;
1484 }
1485
1486 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1487                           struct i915_page_directory_pointer *pdp,
1488                           u64 start, u64 length,
1489                           gen8_pte_t scratch_pte,
1490                           struct seq_file *m)
1491 {
1492         struct i915_address_space *vm = &ppgtt->base;
1493         struct i915_page_directory *pd;
1494         u32 pdpe;
1495
1496         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1497                 struct i915_page_table *pt;
1498                 u64 pd_len = length;
1499                 u64 pd_start = start;
1500                 u32 pde;
1501
1502                 if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
1503                         continue;
1504
1505                 seq_printf(m, "\tPDPE #%d\n", pdpe);
1506                 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1507                         u32 pte;
1508                         gen8_pte_t *pt_vaddr;
1509
1510                         if (pd->page_table[pde] == ppgtt->base.scratch_pt)
1511                                 continue;
1512
1513                         pt_vaddr = kmap_atomic_px(pt);
1514                         for (pte = 0; pte < GEN8_PTES; pte += 4) {
1515                                 u64 va = (pdpe << GEN8_PDPE_SHIFT |
1516                                           pde << GEN8_PDE_SHIFT |
1517                                           pte << GEN8_PTE_SHIFT);
1518                                 int i;
1519                                 bool found = false;
1520
1521                                 for (i = 0; i < 4; i++)
1522                                         if (pt_vaddr[pte + i] != scratch_pte)
1523                                                 found = true;
1524                                 if (!found)
1525                                         continue;
1526
1527                                 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1528                                 for (i = 0; i < 4; i++) {
1529                                         if (pt_vaddr[pte + i] != scratch_pte)
1530                                                 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1531                                         else
1532                                                 seq_puts(m, "  SCRATCH ");
1533                                 }
1534                                 seq_puts(m, "\n");
1535                         }
1536                         kunmap_atomic(pt_vaddr);
1537                 }
1538         }
1539 }
1540
1541 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1542 {
1543         struct i915_address_space *vm = &ppgtt->base;
1544         const gen8_pte_t scratch_pte =
1545                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
1546         u64 start = 0, length = ppgtt->base.total;
1547
1548         if (use_4lvl(vm)) {
1549                 u64 pml4e;
1550                 struct i915_pml4 *pml4 = &ppgtt->pml4;
1551                 struct i915_page_directory_pointer *pdp;
1552
1553                 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1554                         if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
1555                                 continue;
1556
1557                         seq_printf(m, "    PML4E #%llu\n", pml4e);
1558                         gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1559                 }
1560         } else {
1561                 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1562         }
1563 }
1564
1565 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1566 {
1567         struct i915_address_space *vm = &ppgtt->base;
1568         struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1569         struct i915_page_directory *pd;
1570         u64 start = 0, length = ppgtt->base.total;
1571         u64 from = start;
1572         unsigned int pdpe;
1573
1574         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1575                 pd = alloc_pd(vm);
1576                 if (IS_ERR(pd))
1577                         goto unwind;
1578
1579                 gen8_initialize_pd(vm, pd);
1580                 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1581                 pdp->used_pdpes++;
1582         }
1583
1584         pdp->used_pdpes++; /* never remove */
1585         return 0;
1586
1587 unwind:
1588         start -= from;
1589         gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1590                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1591                 free_pd(vm, pd);
1592         }
1593         pdp->used_pdpes = 0;
1594         return -ENOMEM;
1595 }
1596
1597 /*
1598  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1599  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1600  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1601  * space.
1602  *
1603  */
1604 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1605 {
1606         struct i915_address_space *vm = &ppgtt->base;
1607         struct drm_i915_private *dev_priv = vm->i915;
1608         int ret;
1609
1610         ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ?
1611                 1ULL << 48 :
1612                 1ULL << 32;
1613
1614         /* There are only few exceptions for gen >=6. chv and bxt.
1615          * And we are not sure about the latter so play safe for now.
1616          */
1617         if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
1618                 ppgtt->base.pt_kmap_wc = true;
1619
1620         ret = gen8_init_scratch(&ppgtt->base);
1621         if (ret) {
1622                 ppgtt->base.total = 0;
1623                 return ret;
1624         }
1625
1626         if (use_4lvl(vm)) {
1627                 ret = setup_px(&ppgtt->base, &ppgtt->pml4);
1628                 if (ret)
1629                         goto free_scratch;
1630
1631                 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1632
1633                 ppgtt->switch_mm = gen8_mm_switch_4lvl;
1634                 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1635                 ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
1636                 ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
1637         } else {
1638                 ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
1639                 if (ret)
1640                         goto free_scratch;
1641
1642                 if (intel_vgpu_active(dev_priv)) {
1643                         ret = gen8_preallocate_top_level_pdp(ppgtt);
1644                         if (ret) {
1645                                 __pdp_fini(&ppgtt->pdp);
1646                                 goto free_scratch;
1647                         }
1648                 }
1649
1650                 ppgtt->switch_mm = gen8_mm_switch_3lvl;
1651                 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1652                 ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
1653                 ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
1654         }
1655
1656         if (intel_vgpu_active(dev_priv))
1657                 gen8_ppgtt_notify_vgt(ppgtt, true);
1658
1659         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1660         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1661         ppgtt->base.bind_vma = ppgtt_bind_vma;
1662         ppgtt->base.set_pages = ppgtt_set_pages;
1663         ppgtt->base.clear_pages = clear_pages;
1664         ppgtt->debug_dump = gen8_dump_ppgtt;
1665
1666         return 0;
1667
1668 free_scratch:
1669         gen8_free_scratch(&ppgtt->base);
1670         return ret;
1671 }
1672
1673 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1674 {
1675         struct i915_address_space *vm = &ppgtt->base;
1676         struct i915_page_table *unused;
1677         gen6_pte_t scratch_pte;
1678         u32 pd_entry, pte, pde;
1679         u32 start = 0, length = ppgtt->base.total;
1680
1681         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1682                                      I915_CACHE_LLC, 0);
1683
1684         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1685                 u32 expected;
1686                 gen6_pte_t *pt_vaddr;
1687                 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1688                 pd_entry = readl(ppgtt->pd_addr + pde);
1689                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1690
1691                 if (pd_entry != expected)
1692                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1693                                    pde,
1694                                    pd_entry,
1695                                    expected);
1696                 seq_printf(m, "\tPDE: %x\n", pd_entry);
1697
1698                 pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
1699
1700                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1701                         unsigned long va =
1702                                 (pde * PAGE_SIZE * GEN6_PTES) +
1703                                 (pte * PAGE_SIZE);
1704                         int i;
1705                         bool found = false;
1706                         for (i = 0; i < 4; i++)
1707                                 if (pt_vaddr[pte + i] != scratch_pte)
1708                                         found = true;
1709                         if (!found)
1710                                 continue;
1711
1712                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1713                         for (i = 0; i < 4; i++) {
1714                                 if (pt_vaddr[pte + i] != scratch_pte)
1715                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1716                                 else
1717                                         seq_puts(m, "  SCRATCH ");
1718                         }
1719                         seq_puts(m, "\n");
1720                 }
1721                 kunmap_atomic(pt_vaddr);
1722         }
1723 }
1724
1725 /* Write pde (index) from the page directory @pd to the page table @pt */
1726 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
1727                                   const unsigned int pde,
1728                                   const struct i915_page_table *pt)
1729 {
1730         /* Caller needs to make sure the write completes if necessary */
1731         writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1732                        ppgtt->pd_addr + pde);
1733 }
1734
1735 /* Write all the page tables found in the ppgtt structure to incrementing page
1736  * directories. */
1737 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
1738                                   u32 start, u32 length)
1739 {
1740         struct i915_page_table *pt;
1741         unsigned int pde;
1742
1743         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
1744                 gen6_write_pde(ppgtt, pde, pt);
1745
1746         mark_tlbs_dirty(ppgtt);
1747         wmb();
1748 }
1749
1750 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1751 {
1752         GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1753         return ppgtt->pd.base.ggtt_offset << 10;
1754 }
1755
1756 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1757                          struct i915_request *rq)
1758 {
1759         struct intel_engine_cs *engine = rq->engine;
1760         u32 *cs;
1761
1762         /* NB: TLBs must be flushed and invalidated before a switch */
1763         cs = intel_ring_begin(rq, 6);
1764         if (IS_ERR(cs))
1765                 return PTR_ERR(cs);
1766
1767         *cs++ = MI_LOAD_REGISTER_IMM(2);
1768         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1769         *cs++ = PP_DIR_DCLV_2G;
1770         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1771         *cs++ = get_pd_offset(ppgtt);
1772         *cs++ = MI_NOOP;
1773         intel_ring_advance(rq, cs);
1774
1775         return 0;
1776 }
1777
1778 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1779                           struct i915_request *rq)
1780 {
1781         struct intel_engine_cs *engine = rq->engine;
1782         u32 *cs;
1783
1784         /* NB: TLBs must be flushed and invalidated before a switch */
1785         cs = intel_ring_begin(rq, 6);
1786         if (IS_ERR(cs))
1787                 return PTR_ERR(cs);
1788
1789         *cs++ = MI_LOAD_REGISTER_IMM(2);
1790         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1791         *cs++ = PP_DIR_DCLV_2G;
1792         *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1793         *cs++ = get_pd_offset(ppgtt);
1794         *cs++ = MI_NOOP;
1795         intel_ring_advance(rq, cs);
1796
1797         return 0;
1798 }
1799
1800 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1801                           struct i915_request *rq)
1802 {
1803         struct intel_engine_cs *engine = rq->engine;
1804         struct drm_i915_private *dev_priv = rq->i915;
1805
1806         I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1807         I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1808         return 0;
1809 }
1810
1811 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1812 {
1813         struct intel_engine_cs *engine;
1814         enum intel_engine_id id;
1815
1816         for_each_engine(engine, dev_priv, id) {
1817                 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1818                                  GEN8_GFX_PPGTT_48B : 0;
1819                 I915_WRITE(RING_MODE_GEN7(engine),
1820                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1821         }
1822 }
1823
1824 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1825 {
1826         struct intel_engine_cs *engine;
1827         u32 ecochk, ecobits;
1828         enum intel_engine_id id;
1829
1830         ecobits = I915_READ(GAC_ECO_BITS);
1831         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1832
1833         ecochk = I915_READ(GAM_ECOCHK);
1834         if (IS_HASWELL(dev_priv)) {
1835                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1836         } else {
1837                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1838                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1839         }
1840         I915_WRITE(GAM_ECOCHK, ecochk);
1841
1842         for_each_engine(engine, dev_priv, id) {
1843                 /* GFX_MODE is per-ring on gen7+ */
1844                 I915_WRITE(RING_MODE_GEN7(engine),
1845                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1846         }
1847 }
1848
1849 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1850 {
1851         u32 ecochk, gab_ctl, ecobits;
1852
1853         ecobits = I915_READ(GAC_ECO_BITS);
1854         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1855                    ECOBITS_PPGTT_CACHE64B);
1856
1857         gab_ctl = I915_READ(GAB_CTL);
1858         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1859
1860         ecochk = I915_READ(GAM_ECOCHK);
1861         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1862
1863         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1864 }
1865
1866 /* PPGTT support for Sandybdrige/Gen6 and later */
1867 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1868                                    u64 start, u64 length)
1869 {
1870         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1871         unsigned int first_entry = start >> PAGE_SHIFT;
1872         unsigned int pde = first_entry / GEN6_PTES;
1873         unsigned int pte = first_entry % GEN6_PTES;
1874         unsigned int num_entries = length >> PAGE_SHIFT;
1875         gen6_pte_t scratch_pte =
1876                 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1877
1878         while (num_entries) {
1879                 struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
1880                 unsigned int end = min(pte + num_entries, GEN6_PTES);
1881                 gen6_pte_t *vaddr;
1882
1883                 num_entries -= end - pte;
1884
1885                 /* Note that the hw doesn't support removing PDE on the fly
1886                  * (they are cached inside the context with no means to
1887                  * invalidate the cache), so we can only reset the PTE
1888                  * entries back to scratch.
1889                  */
1890
1891                 vaddr = kmap_atomic_px(pt);
1892                 do {
1893                         vaddr[pte++] = scratch_pte;
1894                 } while (pte < end);
1895                 kunmap_atomic(vaddr);
1896
1897                 pte = 0;
1898         }
1899 }
1900
1901 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1902                                       struct i915_vma *vma,
1903                                       enum i915_cache_level cache_level,
1904                                       u32 flags)
1905 {
1906         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1907         unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1908         unsigned act_pt = first_entry / GEN6_PTES;
1909         unsigned act_pte = first_entry % GEN6_PTES;
1910         const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1911         struct sgt_dma iter = sgt_dma(vma);
1912         gen6_pte_t *vaddr;
1913
1914         vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1915         do {
1916                 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1917
1918                 iter.dma += PAGE_SIZE;
1919                 if (iter.dma == iter.max) {
1920                         iter.sg = __sg_next(iter.sg);
1921                         if (!iter.sg)
1922                                 break;
1923
1924                         iter.dma = sg_dma_address(iter.sg);
1925                         iter.max = iter.dma + iter.sg->length;
1926                 }
1927
1928                 if (++act_pte == GEN6_PTES) {
1929                         kunmap_atomic(vaddr);
1930                         vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1931                         act_pte = 0;
1932                 }
1933         } while (1);
1934         kunmap_atomic(vaddr);
1935
1936         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1937 }
1938
1939 static int gen6_alloc_va_range(struct i915_address_space *vm,
1940                                u64 start, u64 length)
1941 {
1942         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1943         struct i915_page_table *pt;
1944         u64 from = start;
1945         unsigned int pde;
1946         bool flush = false;
1947
1948         gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1949                 if (pt == vm->scratch_pt) {
1950                         pt = alloc_pt(vm);
1951                         if (IS_ERR(pt))
1952                                 goto unwind_out;
1953
1954                         gen6_initialize_pt(vm, pt);
1955                         ppgtt->pd.page_table[pde] = pt;
1956                         gen6_write_pde(ppgtt, pde, pt);
1957                         flush = true;
1958                 }
1959         }
1960
1961         if (flush) {
1962                 mark_tlbs_dirty(ppgtt);
1963                 wmb();
1964         }
1965
1966         return 0;
1967
1968 unwind_out:
1969         gen6_ppgtt_clear_range(vm, from, start);
1970         return -ENOMEM;
1971 }
1972
1973 static int gen6_init_scratch(struct i915_address_space *vm)
1974 {
1975         int ret;
1976
1977         ret = setup_scratch_page(vm, I915_GFP_DMA);
1978         if (ret)
1979                 return ret;
1980
1981         vm->scratch_pt = alloc_pt(vm);
1982         if (IS_ERR(vm->scratch_pt)) {
1983                 cleanup_scratch_page(vm);
1984                 return PTR_ERR(vm->scratch_pt);
1985         }
1986
1987         gen6_initialize_pt(vm, vm->scratch_pt);
1988
1989         return 0;
1990 }
1991
1992 static void gen6_free_scratch(struct i915_address_space *vm)
1993 {
1994         free_pt(vm, vm->scratch_pt);
1995         cleanup_scratch_page(vm);
1996 }
1997
1998 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1999 {
2000         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
2001         struct i915_page_directory *pd = &ppgtt->pd;
2002         struct i915_page_table *pt;
2003         u32 pde;
2004
2005         drm_mm_remove_node(&ppgtt->node);
2006
2007         gen6_for_all_pdes(pt, pd, pde)
2008                 if (pt != vm->scratch_pt)
2009                         free_pt(vm, pt);
2010
2011         gen6_free_scratch(vm);
2012 }
2013
2014 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2015 {
2016         struct i915_address_space *vm = &ppgtt->base;
2017         struct drm_i915_private *dev_priv = ppgtt->base.i915;
2018         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2019         int ret;
2020
2021         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2022          * allocator works in address space sizes, so it's multiplied by page
2023          * size. We allocate at the top of the GTT to avoid fragmentation.
2024          */
2025         BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
2026
2027         ret = gen6_init_scratch(vm);
2028         if (ret)
2029                 return ret;
2030
2031         ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
2032                                   GEN6_PD_SIZE, GEN6_PD_ALIGN,
2033                                   I915_COLOR_UNEVICTABLE,
2034                                   0, ggtt->base.total,
2035                                   PIN_HIGH);
2036         if (ret)
2037                 goto err_out;
2038
2039         if (ppgtt->node.start < ggtt->mappable_end)
2040                 DRM_DEBUG("Forced to use aperture for PDEs\n");
2041
2042         ppgtt->pd.base.ggtt_offset =
2043                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2044
2045         ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2046                 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2047
2048         return 0;
2049
2050 err_out:
2051         gen6_free_scratch(vm);
2052         return ret;
2053 }
2054
2055 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2056 {
2057         return gen6_ppgtt_allocate_page_directories(ppgtt);
2058 }
2059
2060 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2061                                   u64 start, u64 length)
2062 {
2063         struct i915_page_table *unused;
2064         u32 pde;
2065
2066         gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2067                 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2068 }
2069
2070 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2071 {
2072         struct drm_i915_private *dev_priv = ppgtt->base.i915;
2073         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2074         int ret;
2075
2076         ppgtt->base.pte_encode = ggtt->base.pte_encode;
2077         if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
2078                 ppgtt->switch_mm = gen6_mm_switch;
2079         else if (IS_HASWELL(dev_priv))
2080                 ppgtt->switch_mm = hsw_mm_switch;
2081         else if (IS_GEN7(dev_priv))
2082                 ppgtt->switch_mm = gen7_mm_switch;
2083         else
2084                 BUG();
2085
2086         ret = gen6_ppgtt_alloc(ppgtt);
2087         if (ret)
2088                 return ret;
2089
2090         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2091
2092         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2093         gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
2094
2095         ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
2096         if (ret) {
2097                 gen6_ppgtt_cleanup(&ppgtt->base);
2098                 return ret;
2099         }
2100
2101         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2102         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2103         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2104         ppgtt->base.bind_vma = ppgtt_bind_vma;
2105         ppgtt->base.set_pages = ppgtt_set_pages;
2106         ppgtt->base.clear_pages = clear_pages;
2107         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2108         ppgtt->debug_dump = gen6_dump_ppgtt;
2109
2110         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2111                          ppgtt->node.size >> 20,
2112                          ppgtt->node.start / PAGE_SIZE);
2113
2114         DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
2115                          ppgtt->pd.base.ggtt_offset << 10);
2116
2117         return 0;
2118 }
2119
2120 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2121                            struct drm_i915_private *dev_priv)
2122 {
2123         ppgtt->base.i915 = dev_priv;
2124         ppgtt->base.dma = &dev_priv->drm.pdev->dev;
2125
2126         if (INTEL_GEN(dev_priv) < 8)
2127                 return gen6_ppgtt_init(ppgtt);
2128         else
2129                 return gen8_ppgtt_init(ppgtt);
2130 }
2131
2132 static void i915_address_space_init(struct i915_address_space *vm,
2133                                     struct drm_i915_private *dev_priv,
2134                                     const char *name)
2135 {
2136         drm_mm_init(&vm->mm, 0, vm->total);
2137         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2138
2139         INIT_LIST_HEAD(&vm->active_list);
2140         INIT_LIST_HEAD(&vm->inactive_list);
2141         INIT_LIST_HEAD(&vm->unbound_list);
2142
2143         list_add_tail(&vm->global_link, &dev_priv->vm_list);
2144         pagevec_init(&vm->free_pages);
2145 }
2146
2147 static void i915_address_space_fini(struct i915_address_space *vm)
2148 {
2149         if (pagevec_count(&vm->free_pages))
2150                 vm_free_pages_release(vm, true);
2151
2152         drm_mm_takedown(&vm->mm);
2153         list_del(&vm->global_link);
2154 }
2155
2156 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2157 {
2158         /* This function is for gtt related workarounds. This function is
2159          * called on driver load and after a GPU reset, so you can place
2160          * workarounds here even if they get overwritten by GPU reset.
2161          */
2162         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2163         if (IS_BROADWELL(dev_priv))
2164                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2165         else if (IS_CHERRYVIEW(dev_priv))
2166                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2167         else if (IS_GEN9_LP(dev_priv))
2168                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2169         else if (INTEL_GEN(dev_priv) >= 9)
2170                 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2171
2172         /*
2173          * To support 64K PTEs we need to first enable the use of the
2174          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2175          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2176          * shouldn't be needed after GEN10.
2177          *
2178          * 64K pages were first introduced from BDW+, although technically they
2179          * only *work* from gen9+. For pre-BDW we instead have the option for
2180          * 32K pages, but we don't currently have any support for it in our
2181          * driver.
2182          */
2183         if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2184             INTEL_GEN(dev_priv) <= 10)
2185                 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2186                            I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2187                            GAMW_ECO_ENABLE_64K_IPS_FIELD);
2188 }
2189
2190 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2191 {
2192         gtt_write_workarounds(dev_priv);
2193
2194         /* In the case of execlists, PPGTT is enabled by the context descriptor
2195          * and the PDPs are contained within the context itself.  We don't
2196          * need to do anything here. */
2197         if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
2198                 return 0;
2199
2200         if (!USES_PPGTT(dev_priv))
2201                 return 0;
2202
2203         if (IS_GEN6(dev_priv))
2204                 gen6_ppgtt_enable(dev_priv);
2205         else if (IS_GEN7(dev_priv))
2206                 gen7_ppgtt_enable(dev_priv);
2207         else if (INTEL_GEN(dev_priv) >= 8)
2208                 gen8_ppgtt_enable(dev_priv);
2209         else
2210                 MISSING_CASE(INTEL_GEN(dev_priv));
2211
2212         return 0;
2213 }
2214
2215 struct i915_hw_ppgtt *
2216 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2217                   struct drm_i915_file_private *fpriv,
2218                   const char *name)
2219 {
2220         struct i915_hw_ppgtt *ppgtt;
2221         int ret;
2222
2223         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2224         if (!ppgtt)
2225                 return ERR_PTR(-ENOMEM);
2226
2227         ret = __hw_ppgtt_init(ppgtt, dev_priv);
2228         if (ret) {
2229                 kfree(ppgtt);
2230                 return ERR_PTR(ret);
2231         }
2232
2233         kref_init(&ppgtt->ref);
2234         i915_address_space_init(&ppgtt->base, dev_priv, name);
2235         ppgtt->base.file = fpriv;
2236
2237         trace_i915_ppgtt_create(&ppgtt->base);
2238
2239         return ppgtt;
2240 }
2241
2242 void i915_ppgtt_close(struct i915_address_space *vm)
2243 {
2244         GEM_BUG_ON(vm->closed);
2245         vm->closed = true;
2246 }
2247
2248 static void ppgtt_destroy_vma(struct i915_address_space *vm)
2249 {
2250         struct list_head *phases[] = {
2251                 &vm->active_list,
2252                 &vm->inactive_list,
2253                 &vm->unbound_list,
2254                 NULL,
2255         }, **phase;
2256
2257         vm->closed = true;
2258         for (phase = phases; *phase; phase++) {
2259                 struct i915_vma *vma, *vn;
2260
2261                 list_for_each_entry_safe(vma, vn, *phase, vm_link)
2262                         i915_vma_destroy(vma);
2263         }
2264 }
2265
2266 void i915_ppgtt_release(struct kref *kref)
2267 {
2268         struct i915_hw_ppgtt *ppgtt =
2269                 container_of(kref, struct i915_hw_ppgtt, ref);
2270
2271         trace_i915_ppgtt_release(&ppgtt->base);
2272
2273         ppgtt_destroy_vma(&ppgtt->base);
2274
2275         GEM_BUG_ON(!list_empty(&ppgtt->base.active_list));
2276         GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list));
2277         GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list));
2278
2279         ppgtt->base.cleanup(&ppgtt->base);
2280         i915_address_space_fini(&ppgtt->base);
2281         kfree(ppgtt);
2282 }
2283
2284 /* Certain Gen5 chipsets require require idling the GPU before
2285  * unmapping anything from the GTT when VT-d is enabled.
2286  */
2287 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2288 {
2289         /* Query intel_iommu to see if we need the workaround. Presumably that
2290          * was loaded first.
2291          */
2292         return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2293 }
2294
2295 static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv)
2296 {
2297         struct intel_engine_cs *engine;
2298         enum intel_engine_id id;
2299         u32 fault;
2300
2301         for_each_engine(engine, dev_priv, id) {
2302                 fault = I915_READ(RING_FAULT_REG(engine));
2303                 if (fault & RING_FAULT_VALID) {
2304                         DRM_DEBUG_DRIVER("Unexpected fault\n"
2305                                          "\tAddr: 0x%08lx\n"
2306                                          "\tAddress space: %s\n"
2307                                          "\tSource ID: %d\n"
2308                                          "\tType: %d\n",
2309                                          fault & PAGE_MASK,
2310                                          fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2311                                          RING_FAULT_SRCID(fault),
2312                                          RING_FAULT_FAULT_TYPE(fault));
2313                         I915_WRITE(RING_FAULT_REG(engine),
2314                                    fault & ~RING_FAULT_VALID);
2315                 }
2316         }
2317
2318         POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2319 }
2320
2321 static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv)
2322 {
2323         u32 fault = I915_READ(GEN8_RING_FAULT_REG);
2324
2325         if (fault & RING_FAULT_VALID) {
2326                 u32 fault_data0, fault_data1;
2327                 u64 fault_addr;
2328
2329                 fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
2330                 fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
2331                 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
2332                              ((u64)fault_data0 << 12);
2333
2334                 DRM_DEBUG_DRIVER("Unexpected fault\n"
2335                                  "\tAddr: 0x%08x_%08x\n"
2336                                  "\tAddress space: %s\n"
2337                                  "\tEngine ID: %d\n"
2338                                  "\tSource ID: %d\n"
2339                                  "\tType: %d\n",
2340                                  upper_32_bits(fault_addr),
2341                                  lower_32_bits(fault_addr),
2342                                  fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
2343                                  GEN8_RING_FAULT_ENGINE_ID(fault),
2344                                  RING_FAULT_SRCID(fault),
2345                                  RING_FAULT_FAULT_TYPE(fault));
2346                 I915_WRITE(GEN8_RING_FAULT_REG,
2347                            fault & ~RING_FAULT_VALID);
2348         }
2349
2350         POSTING_READ(GEN8_RING_FAULT_REG);
2351 }
2352
2353 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2354 {
2355         /* From GEN8 onwards we only have one 'All Engine Fault Register' */
2356         if (INTEL_GEN(dev_priv) >= 8)
2357                 gen8_check_and_clear_faults(dev_priv);
2358         else if (INTEL_GEN(dev_priv) >= 6)
2359                 gen6_check_and_clear_faults(dev_priv);
2360         else
2361                 return;
2362 }
2363
2364 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2365 {
2366         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2367
2368         /* Don't bother messing with faults pre GEN6 as we have little
2369          * documentation supporting that it's a good idea.
2370          */
2371         if (INTEL_GEN(dev_priv) < 6)
2372                 return;
2373
2374         i915_check_and_clear_faults(dev_priv);
2375
2376         ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
2377
2378         i915_ggtt_invalidate(dev_priv);
2379 }
2380
2381 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2382                                struct sg_table *pages)
2383 {
2384         do {
2385                 if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2386                                      pages->sgl, pages->nents,
2387                                      PCI_DMA_BIDIRECTIONAL,
2388                                      DMA_ATTR_NO_WARN))
2389                         return 0;
2390
2391                 /* If the DMA remap fails, one cause can be that we have
2392                  * too many objects pinned in a small remapping table,
2393                  * such as swiotlb. Incrementally purge all other objects and
2394                  * try again - if there are no more pages to remove from
2395                  * the DMA remapper, i915_gem_shrink will return 0.
2396                  */
2397                 GEM_BUG_ON(obj->mm.pages == pages);
2398         } while (i915_gem_shrink(to_i915(obj->base.dev),
2399                                  obj->base.size >> PAGE_SHIFT, NULL,
2400                                  I915_SHRINK_BOUND |
2401                                  I915_SHRINK_UNBOUND |
2402                                  I915_SHRINK_ACTIVE));
2403
2404         return -ENOSPC;
2405 }
2406
2407 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2408 {
2409         writeq(pte, addr);
2410 }
2411
2412 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2413                                   dma_addr_t addr,
2414                                   u64 offset,
2415                                   enum i915_cache_level level,
2416                                   u32 unused)
2417 {
2418         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2419         gen8_pte_t __iomem *pte =
2420                 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2421
2422         gen8_set_pte(pte, gen8_pte_encode(addr, level));
2423
2424         ggtt->invalidate(vm->i915);
2425 }
2426
2427 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2428                                      struct i915_vma *vma,
2429                                      enum i915_cache_level level,
2430                                      u32 unused)
2431 {
2432         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2433         struct sgt_iter sgt_iter;
2434         gen8_pte_t __iomem *gtt_entries;
2435         const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
2436         dma_addr_t addr;
2437
2438         gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2439         gtt_entries += vma->node.start >> PAGE_SHIFT;
2440         for_each_sgt_dma(addr, sgt_iter, vma->pages)
2441                 gen8_set_pte(gtt_entries++, pte_encode | addr);
2442
2443         /*
2444          * We want to flush the TLBs only after we're certain all the PTE
2445          * updates have finished.
2446          */
2447         ggtt->invalidate(vm->i915);
2448 }
2449
2450 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2451                                   dma_addr_t addr,
2452                                   u64 offset,
2453                                   enum i915_cache_level level,
2454                                   u32 flags)
2455 {
2456         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2457         gen6_pte_t __iomem *pte =
2458                 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2459
2460         iowrite32(vm->pte_encode(addr, level, flags), pte);
2461
2462         ggtt->invalidate(vm->i915);
2463 }
2464
2465 /*
2466  * Binds an object into the global gtt with the specified cache level. The object
2467  * will be accessible to the GPU via commands whose operands reference offsets
2468  * within the global GTT as well as accessible by the GPU through the GMADR
2469  * mapped BAR (dev_priv->mm.gtt->gtt).
2470  */
2471 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2472                                      struct i915_vma *vma,
2473                                      enum i915_cache_level level,
2474                                      u32 flags)
2475 {
2476         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2477         gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2478         unsigned int i = vma->node.start >> PAGE_SHIFT;
2479         struct sgt_iter iter;
2480         dma_addr_t addr;
2481         for_each_sgt_dma(addr, iter, vma->pages)
2482                 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2483
2484         /*
2485          * We want to flush the TLBs only after we're certain all the PTE
2486          * updates have finished.
2487          */
2488         ggtt->invalidate(vm->i915);
2489 }
2490
2491 static void nop_clear_range(struct i915_address_space *vm,
2492                             u64 start, u64 length)
2493 {
2494 }
2495
2496 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2497                                   u64 start, u64 length)
2498 {
2499         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2500         unsigned first_entry = start >> PAGE_SHIFT;
2501         unsigned num_entries = length >> PAGE_SHIFT;
2502         const gen8_pte_t scratch_pte =
2503                 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
2504         gen8_pte_t __iomem *gtt_base =
2505                 (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2506         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2507         int i;
2508
2509         if (WARN(num_entries > max_entries,
2510                  "First entry = %d; Num entries = %d (max=%d)\n",
2511                  first_entry, num_entries, max_entries))
2512                 num_entries = max_entries;
2513
2514         for (i = 0; i < num_entries; i++)
2515                 gen8_set_pte(&gtt_base[i], scratch_pte);
2516 }
2517
2518 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2519 {
2520         struct drm_i915_private *dev_priv = vm->i915;
2521
2522         /*
2523          * Make sure the internal GAM fifo has been cleared of all GTT
2524          * writes before exiting stop_machine(). This guarantees that
2525          * any aperture accesses waiting to start in another process
2526          * cannot back up behind the GTT writes causing a hang.
2527          * The register can be any arbitrary GAM register.
2528          */
2529         POSTING_READ(GFX_FLSH_CNTL_GEN6);
2530 }
2531
2532 struct insert_page {
2533         struct i915_address_space *vm;
2534         dma_addr_t addr;
2535         u64 offset;
2536         enum i915_cache_level level;
2537 };
2538
2539 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2540 {
2541         struct insert_page *arg = _arg;
2542
2543         gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2544         bxt_vtd_ggtt_wa(arg->vm);
2545
2546         return 0;
2547 }
2548
2549 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2550                                           dma_addr_t addr,
2551                                           u64 offset,
2552                                           enum i915_cache_level level,
2553                                           u32 unused)
2554 {
2555         struct insert_page arg = { vm, addr, offset, level };
2556
2557         stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2558 }
2559
2560 struct insert_entries {
2561         struct i915_address_space *vm;
2562         struct i915_vma *vma;
2563         enum i915_cache_level level;
2564 };
2565
2566 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2567 {
2568         struct insert_entries *arg = _arg;
2569
2570         gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
2571         bxt_vtd_ggtt_wa(arg->vm);
2572
2573         return 0;
2574 }
2575
2576 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2577                                              struct i915_vma *vma,
2578                                              enum i915_cache_level level,
2579                                              u32 unused)
2580 {
2581         struct insert_entries arg = { vm, vma, level };
2582
2583         stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2584 }
2585
2586 struct clear_range {
2587         struct i915_address_space *vm;
2588         u64 start;
2589         u64 length;
2590 };
2591
2592 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2593 {
2594         struct clear_range *arg = _arg;
2595
2596         gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2597         bxt_vtd_ggtt_wa(arg->vm);
2598
2599         return 0;
2600 }
2601
2602 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2603                                           u64 start,
2604                                           u64 length)
2605 {
2606         struct clear_range arg = { vm, start, length };
2607
2608         stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2609 }
2610
2611 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2612                                   u64 start, u64 length)
2613 {
2614         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2615         unsigned first_entry = start >> PAGE_SHIFT;
2616         unsigned num_entries = length >> PAGE_SHIFT;
2617         gen6_pte_t scratch_pte, __iomem *gtt_base =
2618                 (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2619         const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2620         int i;
2621
2622         if (WARN(num_entries > max_entries,
2623                  "First entry = %d; Num entries = %d (max=%d)\n",
2624                  first_entry, num_entries, max_entries))
2625                 num_entries = max_entries;
2626
2627         scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2628                                      I915_CACHE_LLC, 0);
2629
2630         for (i = 0; i < num_entries; i++)
2631                 iowrite32(scratch_pte, &gtt_base[i]);
2632 }
2633
2634 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2635                                   dma_addr_t addr,
2636                                   u64 offset,
2637                                   enum i915_cache_level cache_level,
2638                                   u32 unused)
2639 {
2640         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2641                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2642
2643         intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2644 }
2645
2646 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2647                                      struct i915_vma *vma,
2648                                      enum i915_cache_level cache_level,
2649                                      u32 unused)
2650 {
2651         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2652                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2653
2654         intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2655                                     flags);
2656 }
2657
2658 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2659                                   u64 start, u64 length)
2660 {
2661         intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2662 }
2663
2664 static int ggtt_bind_vma(struct i915_vma *vma,
2665                          enum i915_cache_level cache_level,
2666                          u32 flags)
2667 {
2668         struct drm_i915_private *i915 = vma->vm->i915;
2669         struct drm_i915_gem_object *obj = vma->obj;
2670         u32 pte_flags;
2671
2672         /* Currently applicable only to VLV */
2673         pte_flags = 0;
2674         if (obj->gt_ro)
2675                 pte_flags |= PTE_READ_ONLY;
2676
2677         intel_runtime_pm_get(i915);
2678         vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2679         intel_runtime_pm_put(i915);
2680
2681         vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2682
2683         /*
2684          * Without aliasing PPGTT there's no difference between
2685          * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2686          * upgrade to both bound if we bind either to avoid double-binding.
2687          */
2688         vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2689
2690         return 0;
2691 }
2692
2693 static void ggtt_unbind_vma(struct i915_vma *vma)
2694 {
2695         struct drm_i915_private *i915 = vma->vm->i915;
2696
2697         intel_runtime_pm_get(i915);
2698         vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2699         intel_runtime_pm_put(i915);
2700 }
2701
2702 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2703                                  enum i915_cache_level cache_level,
2704                                  u32 flags)
2705 {
2706         struct drm_i915_private *i915 = vma->vm->i915;
2707         u32 pte_flags;
2708         int ret;
2709
2710         /* Currently applicable only to VLV */
2711         pte_flags = 0;
2712         if (vma->obj->gt_ro)
2713                 pte_flags |= PTE_READ_ONLY;
2714
2715         if (flags & I915_VMA_LOCAL_BIND) {
2716                 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2717
2718                 if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2719                     appgtt->base.allocate_va_range) {
2720                         ret = appgtt->base.allocate_va_range(&appgtt->base,
2721                                                              vma->node.start,
2722                                                              vma->size);
2723                         if (ret)
2724                                 return ret;
2725                 }
2726
2727                 appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
2728                                             pte_flags);
2729         }
2730
2731         if (flags & I915_VMA_GLOBAL_BIND) {
2732                 intel_runtime_pm_get(i915);
2733                 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2734                 intel_runtime_pm_put(i915);
2735         }
2736
2737         return 0;
2738 }
2739
2740 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2741 {
2742         struct drm_i915_private *i915 = vma->vm->i915;
2743
2744         if (vma->flags & I915_VMA_GLOBAL_BIND) {
2745                 intel_runtime_pm_get(i915);
2746                 vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2747                 intel_runtime_pm_put(i915);
2748         }
2749
2750         if (vma->flags & I915_VMA_LOCAL_BIND) {
2751                 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
2752
2753                 vm->clear_range(vm, vma->node.start, vma->size);
2754         }
2755 }
2756
2757 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2758                                struct sg_table *pages)
2759 {
2760         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2761         struct device *kdev = &dev_priv->drm.pdev->dev;
2762         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2763
2764         if (unlikely(ggtt->do_idle_maps)) {
2765                 if (i915_gem_wait_for_idle(dev_priv, 0)) {
2766                         DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2767                         /* Wait a bit, in hopes it avoids the hang */
2768                         udelay(10);
2769                 }
2770         }
2771
2772         dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2773 }
2774
2775 static int ggtt_set_pages(struct i915_vma *vma)
2776 {
2777         int ret;
2778
2779         GEM_BUG_ON(vma->pages);
2780
2781         ret = i915_get_ggtt_vma_pages(vma);
2782         if (ret)
2783                 return ret;
2784
2785         vma->page_sizes = vma->obj->mm.page_sizes;
2786
2787         return 0;
2788 }
2789
2790 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2791                                   unsigned long color,
2792                                   u64 *start,
2793                                   u64 *end)
2794 {
2795         if (node->allocated && node->color != color)
2796                 *start += I915_GTT_PAGE_SIZE;
2797
2798         /* Also leave a space between the unallocated reserved node after the
2799          * GTT and any objects within the GTT, i.e. we use the color adjustment
2800          * to insert a guard page to prevent prefetches crossing over the
2801          * GTT boundary.
2802          */
2803         node = list_next_entry(node, node_list);
2804         if (node->color != color)
2805                 *end -= I915_GTT_PAGE_SIZE;
2806 }
2807
2808 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2809 {
2810         struct i915_ggtt *ggtt = &i915->ggtt;
2811         struct i915_hw_ppgtt *ppgtt;
2812         int err;
2813
2814         ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
2815         if (IS_ERR(ppgtt))
2816                 return PTR_ERR(ppgtt);
2817
2818         if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
2819                 err = -ENODEV;
2820                 goto err_ppgtt;
2821         }
2822
2823         if (ppgtt->base.allocate_va_range) {
2824                 /* Note we only pre-allocate as far as the end of the global
2825                  * GTT. On 48b / 4-level page-tables, the difference is very,
2826                  * very significant! We have to preallocate as GVT/vgpu does
2827                  * not like the page directory disappearing.
2828                  */
2829                 err = ppgtt->base.allocate_va_range(&ppgtt->base,
2830                                                     0, ggtt->base.total);
2831                 if (err)
2832                         goto err_ppgtt;
2833         }
2834
2835         i915->mm.aliasing_ppgtt = ppgtt;
2836
2837         GEM_BUG_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2838         ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2839
2840         GEM_BUG_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
2841         ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
2842
2843         return 0;
2844
2845 err_ppgtt:
2846         i915_ppgtt_put(ppgtt);
2847         return err;
2848 }
2849
2850 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2851 {
2852         struct i915_ggtt *ggtt = &i915->ggtt;
2853         struct i915_hw_ppgtt *ppgtt;
2854
2855         ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2856         if (!ppgtt)
2857                 return;
2858
2859         i915_ppgtt_put(ppgtt);
2860
2861         ggtt->base.bind_vma = ggtt_bind_vma;
2862         ggtt->base.unbind_vma = ggtt_unbind_vma;
2863 }
2864
2865 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2866 {
2867         /* Let GEM Manage all of the aperture.
2868          *
2869          * However, leave one page at the end still bound to the scratch page.
2870          * There are a number of places where the hardware apparently prefetches
2871          * past the end of the object, and we've seen multiple hangs with the
2872          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2873          * aperture.  One page should be enough to keep any prefetching inside
2874          * of the aperture.
2875          */
2876         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2877         unsigned long hole_start, hole_end;
2878         struct drm_mm_node *entry;
2879         int ret;
2880
2881         ret = intel_vgt_balloon(dev_priv);
2882         if (ret)
2883                 return ret;
2884
2885         /* Reserve a mappable slot for our lockless error capture */
2886         ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
2887                                           PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2888                                           0, ggtt->mappable_end,
2889                                           DRM_MM_INSERT_LOW);
2890         if (ret)
2891                 return ret;
2892
2893         /* Clear any non-preallocated blocks */
2894         drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2895                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2896                               hole_start, hole_end);
2897                 ggtt->base.clear_range(&ggtt->base, hole_start,
2898                                        hole_end - hole_start);
2899         }
2900
2901         /* And finally clear the reserved guard page */
2902         ggtt->base.clear_range(&ggtt->base,
2903                                ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2904
2905         if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2906                 ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2907                 if (ret)
2908                         goto err;
2909         }
2910
2911         return 0;
2912
2913 err:
2914         drm_mm_remove_node(&ggtt->error_capture);
2915         return ret;
2916 }
2917
2918 /**
2919  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2920  * @dev_priv: i915 device
2921  */
2922 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2923 {
2924         struct i915_ggtt *ggtt = &dev_priv->ggtt;
2925         struct i915_vma *vma, *vn;
2926         struct pagevec *pvec;
2927
2928         ggtt->base.closed = true;
2929
2930         mutex_lock(&dev_priv->drm.struct_mutex);
2931         GEM_BUG_ON(!list_empty(&ggtt->base.active_list));
2932         list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
2933                 WARN_ON(i915_vma_unbind(vma));
2934         mutex_unlock(&dev_priv->drm.struct_mutex);
2935
2936         i915_gem_cleanup_stolen(&dev_priv->drm);
2937
2938         mutex_lock(&dev_priv->drm.struct_mutex);
2939         i915_gem_fini_aliasing_ppgtt(dev_priv);
2940
2941         if (drm_mm_node_allocated(&ggtt->error_capture))
2942                 drm_mm_remove_node(&ggtt->error_capture);
2943
2944         if (drm_mm_initialized(&ggtt->base.mm)) {
2945                 intel_vgt_deballoon(dev_priv);
2946                 i915_address_space_fini(&ggtt->base);
2947         }
2948
2949         ggtt->base.cleanup(&ggtt->base);
2950
2951         pvec = &dev_priv->mm.wc_stash;
2952         if (pvec->nr) {
2953                 set_pages_array_wb(pvec->pages, pvec->nr);
2954                 __pagevec_release(pvec);
2955         }
2956
2957         mutex_unlock(&dev_priv->drm.struct_mutex);
2958
2959         arch_phys_wc_del(ggtt->mtrr);
2960         io_mapping_fini(&ggtt->iomap);
2961 }
2962
2963 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2964 {
2965         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2966         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2967         return snb_gmch_ctl << 20;
2968 }
2969
2970 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2971 {
2972         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2973         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2974         if (bdw_gmch_ctl)
2975                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2976
2977 #ifdef CONFIG_X86_32
2978         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2979         if (bdw_gmch_ctl > 4)
2980                 bdw_gmch_ctl = 4;
2981 #endif
2982
2983         return bdw_gmch_ctl << 20;
2984 }
2985
2986 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2987 {
2988         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2989         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2990
2991         if (gmch_ctrl)
2992                 return 1 << (20 + gmch_ctrl);
2993
2994         return 0;
2995 }
2996
2997 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2998 {
2999         struct drm_i915_private *dev_priv = ggtt->base.i915;
3000         struct pci_dev *pdev = dev_priv->drm.pdev;
3001         phys_addr_t phys_addr;
3002         int ret;
3003
3004         /* For Modern GENs the PTEs and register space are split in the BAR */
3005         phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
3006
3007         /*
3008          * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
3009          * will be dropped. For WC mappings in general we have 64 byte burst
3010          * writes when the WC buffer is flushed, so we can't use it, but have to
3011          * resort to an uncached mapping. The WC issue is easily caught by the
3012          * readback check when writing GTT PTE entries.
3013          */
3014         if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3015                 ggtt->gsm = ioremap_nocache(phys_addr, size);
3016         else
3017                 ggtt->gsm = ioremap_wc(phys_addr, size);
3018         if (!ggtt->gsm) {
3019                 DRM_ERROR("Failed to map the ggtt page table\n");
3020                 return -ENOMEM;
3021         }
3022
3023         ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
3024         if (ret) {
3025                 DRM_ERROR("Scratch setup failed\n");
3026                 /* iounmap will also get called at remove, but meh */
3027                 iounmap(ggtt->gsm);
3028                 return ret;
3029         }
3030
3031         return 0;
3032 }
3033
3034 static struct intel_ppat_entry *
3035 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3036 {
3037         struct intel_ppat_entry *entry = &ppat->entries[index];
3038
3039         GEM_BUG_ON(index >= ppat->max_entries);
3040         GEM_BUG_ON(test_bit(index, ppat->used));
3041
3042         entry->ppat = ppat;
3043         entry->value = value;
3044         kref_init(&entry->ref);
3045         set_bit(index, ppat->used);
3046         set_bit(index, ppat->dirty);
3047
3048         return entry;
3049 }
3050
3051 static void __free_ppat_entry(struct intel_ppat_entry *entry)
3052 {
3053         struct intel_ppat *ppat = entry->ppat;
3054         unsigned int index = entry - ppat->entries;
3055
3056         GEM_BUG_ON(index >= ppat->max_entries);
3057         GEM_BUG_ON(!test_bit(index, ppat->used));
3058
3059         entry->value = ppat->clear_value;
3060         clear_bit(index, ppat->used);
3061         set_bit(index, ppat->dirty);
3062 }
3063
3064 /**
3065  * intel_ppat_get - get a usable PPAT entry
3066  * @i915: i915 device instance
3067  * @value: the PPAT value required by the caller
3068  *
3069  * The function tries to search if there is an existing PPAT entry which
3070  * matches with the required value. If perfectly matched, the existing PPAT
3071  * entry will be used. If only partially matched, it will try to check if
3072  * there is any available PPAT index. If yes, it will allocate a new PPAT
3073  * index for the required entry and update the HW. If not, the partially
3074  * matched entry will be used.
3075  */
3076 const struct intel_ppat_entry *
3077 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3078 {
3079         struct intel_ppat *ppat = &i915->ppat;
3080         struct intel_ppat_entry *entry = NULL;
3081         unsigned int scanned, best_score;
3082         int i;
3083
3084         GEM_BUG_ON(!ppat->max_entries);
3085
3086         scanned = best_score = 0;
3087         for_each_set_bit(i, ppat->used, ppat->max_entries) {
3088                 unsigned int score;
3089
3090                 score = ppat->match(ppat->entries[i].value, value);
3091                 if (score > best_score) {
3092                         entry = &ppat->entries[i];
3093                         if (score == INTEL_PPAT_PERFECT_MATCH) {
3094                                 kref_get(&entry->ref);
3095                                 return entry;
3096                         }
3097                         best_score = score;
3098                 }
3099                 scanned++;
3100         }
3101
3102         if (scanned == ppat->max_entries) {
3103                 if (!entry)
3104                         return ERR_PTR(-ENOSPC);
3105
3106                 kref_get(&entry->ref);
3107                 return entry;
3108         }
3109
3110         i = find_first_zero_bit(ppat->used, ppat->max_entries);
3111         entry = __alloc_ppat_entry(ppat, i, value);
3112         ppat->update_hw(i915);
3113         return entry;
3114 }
3115
3116 static void release_ppat(struct kref *kref)
3117 {
3118         struct intel_ppat_entry *entry =
3119                 container_of(kref, struct intel_ppat_entry, ref);
3120         struct drm_i915_private *i915 = entry->ppat->i915;
3121
3122         __free_ppat_entry(entry);
3123         entry->ppat->update_hw(i915);
3124 }
3125
3126 /**
3127  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3128  * @entry: an intel PPAT entry
3129  *
3130  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3131  * entry is dynamically allocated, its reference count will be decreased. Once
3132  * the reference count becomes into zero, the PPAT index becomes free again.
3133  */
3134 void intel_ppat_put(const struct intel_ppat_entry *entry)
3135 {
3136         struct intel_ppat *ppat = entry->ppat;
3137         unsigned int index = entry - ppat->entries;
3138
3139         GEM_BUG_ON(!ppat->max_entries);
3140
3141         kref_put(&ppat->entries[index].ref, release_ppat);
3142 }
3143
3144 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3145 {
3146         struct intel_ppat *ppat = &dev_priv->ppat;
3147         int i;
3148
3149         for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3150                 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3151                 clear_bit(i, ppat->dirty);
3152         }
3153 }
3154
3155 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3156 {
3157         struct intel_ppat *ppat = &dev_priv->ppat;
3158         u64 pat = 0;
3159         int i;
3160
3161         for (i = 0; i < ppat->max_entries; i++)
3162                 pat |= GEN8_PPAT(i, ppat->entries[i].value);
3163
3164         bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3165
3166         I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3167         I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3168 }
3169
3170 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3171 {
3172         unsigned int score = 0;
3173         enum {
3174                 AGE_MATCH = BIT(0),
3175                 TC_MATCH = BIT(1),
3176                 CA_MATCH = BIT(2),
3177         };
3178
3179         /* Cache attribute has to be matched. */
3180         if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3181                 return 0;
3182
3183         score |= CA_MATCH;
3184
3185         if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3186                 score |= TC_MATCH;
3187
3188         if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3189                 score |= AGE_MATCH;
3190
3191         if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3192                 return INTEL_PPAT_PERFECT_MATCH;
3193
3194         return score;
3195 }
3196
3197 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3198 {
3199         return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3200                 INTEL_PPAT_PERFECT_MATCH : 0;
3201 }
3202
3203 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3204 {
3205         ppat->max_entries = 8;
3206         ppat->update_hw = cnl_private_pat_update_hw;
3207         ppat->match = bdw_private_pat_match;
3208         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3209
3210         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3211         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3212         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3213         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3214         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3215         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3216         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3217         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3218 }
3219
3220 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3221  * bits. When using advanced contexts each context stores its own PAT, but
3222  * writing this data shouldn't be harmful even in those cases. */
3223 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3224 {
3225         ppat->max_entries = 8;
3226         ppat->update_hw = bdw_private_pat_update_hw;
3227         ppat->match = bdw_private_pat_match;
3228         ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3229
3230         if (!USES_PPGTT(ppat->i915)) {
3231                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3232                  * so RTL will always use the value corresponding to
3233                  * pat_sel = 000".
3234                  * So let's disable cache for GGTT to avoid screen corruptions.
3235                  * MOCS still can be used though.
3236                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3237                  * before this patch, i.e. the same uncached + snooping access
3238                  * like on gen6/7 seems to be in effect.
3239                  * - So this just fixes blitter/render access. Again it looks
3240                  * like it's not just uncached access, but uncached + snooping.
3241                  * So we can still hold onto all our assumptions wrt cpu
3242                  * clflushing on LLC machines.
3243                  */
3244                 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3245                 return;
3246         }
3247
3248         __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3249         __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3250         __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3251         __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3252         __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3253         __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3254         __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3255         __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3256 }
3257
3258 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3259 {
3260         ppat->max_entries = 8;
3261         ppat->update_hw = bdw_private_pat_update_hw;
3262         ppat->match = chv_private_pat_match;
3263         ppat->clear_value = CHV_PPAT_SNOOP;
3264
3265         /*
3266          * Map WB on BDW to snooped on CHV.
3267          *
3268          * Only the snoop bit has meaning for CHV, the rest is
3269          * ignored.
3270          *
3271          * The hardware will never snoop for certain types of accesses:
3272          * - CPU GTT (GMADR->GGTT->no snoop->memory)
3273          * - PPGTT page tables
3274          * - some other special cycles
3275          *
3276          * As with BDW, we also need to consider the following for GT accesses:
3277          * "For GGTT, there is NO pat_sel[2:0] from the entry,
3278          * so RTL will always use the value corresponding to
3279          * pat_sel = 000".
3280          * Which means we must set the snoop bit in PAT entry 0
3281          * in order to keep the global status page working.
3282          */
3283
3284         __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3285         __alloc_ppat_entry(ppat, 1, 0);
3286         __alloc_ppat_entry(ppat, 2, 0);
3287         __alloc_ppat_entry(ppat, 3, 0);
3288         __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3289         __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3290         __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3291         __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3292 }
3293
3294 static void gen6_gmch_remove(struct i915_address_space *vm)
3295 {
3296         struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3297
3298         iounmap(ggtt->gsm);
3299         cleanup_scratch_page(vm);
3300 }
3301
3302 static void setup_private_pat(struct drm_i915_private *dev_priv)
3303 {
3304         struct intel_ppat *ppat = &dev_priv->ppat;
3305         int i;
3306
3307         ppat->i915 = dev_priv;
3308
3309         if (INTEL_GEN(dev_priv) >= 10)
3310                 cnl_setup_private_ppat(ppat);
3311         else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3312                 chv_setup_private_ppat(ppat);
3313         else
3314                 bdw_setup_private_ppat(ppat);
3315
3316         GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3317
3318         for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3319                 ppat->entries[i].value = ppat->clear_value;
3320                 ppat->entries[i].ppat = ppat;
3321                 set_bit(i, ppat->dirty);
3322         }
3323
3324         ppat->update_hw(dev_priv);
3325 }
3326
3327 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3328 {
3329         struct drm_i915_private *dev_priv = ggtt->base.i915;
3330         struct pci_dev *pdev = dev_priv->drm.pdev;
3331         unsigned int size;
3332         u16 snb_gmch_ctl;
3333         int err;
3334
3335         /* TODO: We're not aware of mappable constraints on gen8 yet */
3336         ggtt->gmadr =
3337                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3338                                                  pci_resource_len(pdev, 2));
3339         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3340
3341         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3342         if (!err)
3343                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3344         if (err)
3345                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3346
3347         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3348         if (IS_CHERRYVIEW(dev_priv))
3349                 size = chv_get_total_gtt_size(snb_gmch_ctl);
3350         else
3351                 size = gen8_get_total_gtt_size(snb_gmch_ctl);
3352
3353         ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3354         ggtt->base.cleanup = gen6_gmch_remove;
3355         ggtt->base.bind_vma = ggtt_bind_vma;
3356         ggtt->base.unbind_vma = ggtt_unbind_vma;
3357         ggtt->base.set_pages = ggtt_set_pages;
3358         ggtt->base.clear_pages = clear_pages;
3359         ggtt->base.insert_page = gen8_ggtt_insert_page;
3360         ggtt->base.clear_range = nop_clear_range;
3361         if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3362                 ggtt->base.clear_range = gen8_ggtt_clear_range;
3363
3364         ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3365
3366         /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3367         if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3368                 ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3369                 ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3370                 if (ggtt->base.clear_range != nop_clear_range)
3371                         ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3372         }
3373
3374         ggtt->invalidate = gen6_ggtt_invalidate;
3375
3376         setup_private_pat(dev_priv);
3377
3378         return ggtt_probe_common(ggtt, size);
3379 }
3380
3381 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3382 {
3383         struct drm_i915_private *dev_priv = ggtt->base.i915;
3384         struct pci_dev *pdev = dev_priv->drm.pdev;
3385         unsigned int size;
3386         u16 snb_gmch_ctl;
3387         int err;
3388
3389         ggtt->gmadr =
3390                 (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3391                                                  pci_resource_len(pdev, 2));
3392         ggtt->mappable_end = resource_size(&ggtt->gmadr);
3393
3394         /* 64/512MB is the current min/max we actually know of, but this is just
3395          * a coarse sanity check.
3396          */
3397         if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3398                 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3399                 return -ENXIO;
3400         }
3401
3402         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3403         if (!err)
3404                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3405         if (err)
3406                 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3407         pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3408
3409         size = gen6_get_total_gtt_size(snb_gmch_ctl);
3410         ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3411
3412         ggtt->base.clear_range = gen6_ggtt_clear_range;
3413         ggtt->base.insert_page = gen6_ggtt_insert_page;
3414         ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3415         ggtt->base.bind_vma = ggtt_bind_vma;
3416         ggtt->base.unbind_vma = ggtt_unbind_vma;
3417         ggtt->base.set_pages = ggtt_set_pages;
3418         ggtt->base.clear_pages = clear_pages;
3419         ggtt->base.cleanup = gen6_gmch_remove;
3420
3421         ggtt->invalidate = gen6_ggtt_invalidate;
3422
3423         if (HAS_EDRAM(dev_priv))
3424                 ggtt->base.pte_encode = iris_pte_encode;
3425         else if (IS_HASWELL(dev_priv))
3426                 ggtt->base.pte_encode = hsw_pte_encode;
3427         else if (IS_VALLEYVIEW(dev_priv))
3428                 ggtt->base.pte_encode = byt_pte_encode;
3429         else if (INTEL_GEN(dev_priv) >= 7)
3430                 ggtt->base.pte_encode = ivb_pte_encode;
3431         else
3432                 ggtt->base.pte_encode = snb_pte_encode;
3433
3434         return ggtt_probe_common(ggtt, size);
3435 }
3436
3437 static void i915_gmch_remove(struct i915_address_space *vm)
3438 {
3439         intel_gmch_remove();
3440 }
3441
3442 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3443 {
3444         struct drm_i915_private *dev_priv = ggtt->base.i915;
3445         phys_addr_t gmadr_base;
3446         int ret;
3447
3448         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3449         if (!ret) {
3450                 DRM_ERROR("failed to set up gmch\n");
3451                 return -EIO;
3452         }
3453
3454         intel_gtt_get(&ggtt->base.total,
3455                       &gmadr_base,
3456                       &ggtt->mappable_end);
3457
3458         ggtt->gmadr =
3459                 (struct resource) DEFINE_RES_MEM(gmadr_base,
3460                                                  ggtt->mappable_end);
3461
3462         ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3463         ggtt->base.insert_page = i915_ggtt_insert_page;
3464         ggtt->base.insert_entries = i915_ggtt_insert_entries;
3465         ggtt->base.clear_range = i915_ggtt_clear_range;
3466         ggtt->base.bind_vma = ggtt_bind_vma;
3467         ggtt->base.unbind_vma = ggtt_unbind_vma;
3468         ggtt->base.set_pages = ggtt_set_pages;
3469         ggtt->base.clear_pages = clear_pages;
3470         ggtt->base.cleanup = i915_gmch_remove;
3471
3472         ggtt->invalidate = gmch_ggtt_invalidate;
3473
3474         if (unlikely(ggtt->do_idle_maps))
3475                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3476
3477         return 0;
3478 }
3479
3480 /**
3481  * i915_ggtt_probe_hw - Probe GGTT hardware location
3482  * @dev_priv: i915 device
3483  */
3484 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3485 {
3486         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3487         int ret;
3488
3489         ggtt->base.i915 = dev_priv;
3490         ggtt->base.dma = &dev_priv->drm.pdev->dev;
3491
3492         if (INTEL_GEN(dev_priv) <= 5)
3493                 ret = i915_gmch_probe(ggtt);
3494         else if (INTEL_GEN(dev_priv) < 8)
3495                 ret = gen6_gmch_probe(ggtt);
3496         else
3497                 ret = gen8_gmch_probe(ggtt);
3498         if (ret)
3499                 return ret;
3500
3501         /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3502          * This is easier than doing range restriction on the fly, as we
3503          * currently don't have any bits spare to pass in this upper
3504          * restriction!
3505          */
3506         if (USES_GUC(dev_priv)) {
3507                 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
3508                 ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
3509         }
3510
3511         if ((ggtt->base.total - 1) >> 32) {
3512                 DRM_ERROR("We never expected a Global GTT with more than 32bits"
3513                           " of address space! Found %lldM!\n",
3514                           ggtt->base.total >> 20);
3515                 ggtt->base.total = 1ULL << 32;
3516                 ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
3517         }
3518
3519         if (ggtt->mappable_end > ggtt->base.total) {
3520                 DRM_ERROR("mappable aperture extends past end of GGTT,"
3521                           " aperture=%pa, total=%llx\n",
3522                           &ggtt->mappable_end, ggtt->base.total);
3523                 ggtt->mappable_end = ggtt->base.total;
3524         }
3525
3526         /* GMADR is the PCI mmio aperture into the global GTT. */
3527         DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20);
3528         DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3529         DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3530                          (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3531         if (intel_vtd_active())
3532                 DRM_INFO("VT-d active for gfx access\n");
3533
3534         return 0;
3535 }
3536
3537 /**
3538  * i915_ggtt_init_hw - Initialize GGTT hardware
3539  * @dev_priv: i915 device
3540  */
3541 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3542 {
3543         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3544         int ret;
3545
3546         INIT_LIST_HEAD(&dev_priv->vm_list);
3547
3548         /* Note that we use page colouring to enforce a guard page at the
3549          * end of the address space. This is required as the CS may prefetch
3550          * beyond the end of the batch buffer, across the page boundary,
3551          * and beyond the end of the GTT if we do not provide a guard.
3552          */
3553         mutex_lock(&dev_priv->drm.struct_mutex);
3554         i915_address_space_init(&ggtt->base, dev_priv, "[global]");
3555         if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3556                 ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3557         mutex_unlock(&dev_priv->drm.struct_mutex);
3558
3559         if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3560                                 dev_priv->ggtt.gmadr.start,
3561                                 dev_priv->ggtt.mappable_end)) {
3562                 ret = -EIO;
3563                 goto out_gtt_cleanup;
3564         }
3565
3566         ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3567
3568         /*
3569          * Initialise stolen early so that we may reserve preallocated
3570          * objects for the BIOS to KMS transition.
3571          */
3572         ret = i915_gem_init_stolen(dev_priv);
3573         if (ret)
3574                 goto out_gtt_cleanup;
3575
3576         return 0;
3577
3578 out_gtt_cleanup:
3579         ggtt->base.cleanup(&ggtt->base);
3580         return ret;
3581 }
3582
3583 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3584 {
3585         if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3586                 return -EIO;
3587
3588         return 0;
3589 }
3590
3591 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3592 {
3593         GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3594
3595         i915->ggtt.invalidate = guc_ggtt_invalidate;
3596
3597         i915_ggtt_invalidate(i915);
3598 }
3599
3600 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3601 {
3602         /* We should only be called after i915_ggtt_enable_guc() */
3603         GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3604
3605         i915->ggtt.invalidate = gen6_ggtt_invalidate;
3606
3607         i915_ggtt_invalidate(i915);
3608 }
3609
3610 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3611 {
3612         struct i915_ggtt *ggtt = &dev_priv->ggtt;
3613         struct drm_i915_gem_object *obj, *on;
3614
3615         i915_check_and_clear_faults(dev_priv);
3616
3617         /* First fill our portion of the GTT with scratch pages */
3618         ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
3619
3620         ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3621
3622         /* clflush objects bound into the GGTT and rebind them. */
3623         list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) {
3624                 bool ggtt_bound = false;
3625                 struct i915_vma *vma;
3626
3627                 for_each_ggtt_vma(vma, obj) {
3628                         if (!i915_vma_unbind(vma))
3629                                 continue;
3630
3631                         WARN_ON(i915_vma_bind(vma, obj->cache_level,
3632                                               PIN_UPDATE));
3633                         ggtt_bound = true;
3634                 }
3635
3636                 if (ggtt_bound)
3637                         WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3638         }
3639
3640         ggtt->base.closed = false;
3641
3642         if (INTEL_GEN(dev_priv) >= 8) {
3643                 struct intel_ppat *ppat = &dev_priv->ppat;
3644
3645                 bitmap_set(ppat->dirty, 0, ppat->max_entries);
3646                 dev_priv->ppat.update_hw(dev_priv);
3647                 return;
3648         }
3649
3650         if (USES_PPGTT(dev_priv)) {
3651                 struct i915_address_space *vm;
3652
3653                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3654                         struct i915_hw_ppgtt *ppgtt;
3655
3656                         if (i915_is_ggtt(vm))
3657                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
3658                         else
3659                                 ppgtt = i915_vm_to_ppgtt(vm);
3660
3661                         gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
3662                 }
3663         }
3664
3665         i915_ggtt_invalidate(dev_priv);
3666 }
3667
3668 static struct scatterlist *
3669 rotate_pages(const dma_addr_t *in, unsigned int offset,
3670              unsigned int width, unsigned int height,
3671              unsigned int stride,
3672              struct sg_table *st, struct scatterlist *sg)
3673 {
3674         unsigned int column, row;
3675         unsigned int src_idx;
3676
3677         for (column = 0; column < width; column++) {
3678                 src_idx = stride * (height - 1) + column;
3679                 for (row = 0; row < height; row++) {
3680                         st->nents++;
3681                         /* We don't need the pages, but need to initialize
3682                          * the entries so the sg list can be happily traversed.
3683                          * The only thing we need are DMA addresses.
3684                          */
3685                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
3686                         sg_dma_address(sg) = in[offset + src_idx];
3687                         sg_dma_len(sg) = PAGE_SIZE;
3688                         sg = sg_next(sg);
3689                         src_idx -= stride;
3690                 }
3691         }
3692
3693         return sg;
3694 }
3695
3696 static noinline struct sg_table *
3697 intel_rotate_pages(struct intel_rotation_info *rot_info,
3698                    struct drm_i915_gem_object *obj)
3699 {
3700         const unsigned long n_pages = obj->base.size / PAGE_SIZE;
3701         unsigned int size = intel_rotation_info_size(rot_info);
3702         struct sgt_iter sgt_iter;
3703         dma_addr_t dma_addr;
3704         unsigned long i;
3705         dma_addr_t *page_addr_list;
3706         struct sg_table *st;
3707         struct scatterlist *sg;
3708         int ret = -ENOMEM;
3709
3710         /* Allocate a temporary list of source pages for random access. */
3711         page_addr_list = kvmalloc_array(n_pages,
3712                                         sizeof(dma_addr_t),
3713                                         GFP_KERNEL);
3714         if (!page_addr_list)
3715                 return ERR_PTR(ret);
3716
3717         /* Allocate target SG list. */
3718         st = kmalloc(sizeof(*st), GFP_KERNEL);
3719         if (!st)
3720                 goto err_st_alloc;
3721
3722         ret = sg_alloc_table(st, size, GFP_KERNEL);
3723         if (ret)
3724                 goto err_sg_alloc;
3725
3726         /* Populate source page list from the object. */
3727         i = 0;
3728         for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3729                 page_addr_list[i++] = dma_addr;
3730
3731         GEM_BUG_ON(i != n_pages);
3732         st->nents = 0;
3733         sg = st->sgl;
3734
3735         for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3736                 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3737                                   rot_info->plane[i].width, rot_info->plane[i].height,
3738                                   rot_info->plane[i].stride, st, sg);
3739         }
3740
3741         kvfree(page_addr_list);
3742
3743         return st;
3744
3745 err_sg_alloc:
3746         kfree(st);
3747 err_st_alloc:
3748         kvfree(page_addr_list);
3749
3750         DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3751                          obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3752
3753         return ERR_PTR(ret);
3754 }
3755
3756 static noinline struct sg_table *
3757 intel_partial_pages(const struct i915_ggtt_view *view,
3758                     struct drm_i915_gem_object *obj)
3759 {
3760         struct sg_table *st;
3761         struct scatterlist *sg, *iter;
3762         unsigned int count = view->partial.size;
3763         unsigned int offset;
3764         int ret = -ENOMEM;
3765
3766         st = kmalloc(sizeof(*st), GFP_KERNEL);
3767         if (!st)
3768                 goto err_st_alloc;
3769
3770         ret = sg_alloc_table(st, count, GFP_KERNEL);
3771         if (ret)
3772                 goto err_sg_alloc;
3773
3774         iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3775         GEM_BUG_ON(!iter);
3776
3777         sg = st->sgl;
3778         st->nents = 0;
3779         do {
3780                 unsigned int len;
3781
3782                 len = min(iter->length - (offset << PAGE_SHIFT),
3783                           count << PAGE_SHIFT);
3784                 sg_set_page(sg, NULL, len, 0);
3785                 sg_dma_address(sg) =
3786                         sg_dma_address(iter) + (offset << PAGE_SHIFT);
3787                 sg_dma_len(sg) = len;
3788
3789                 st->nents++;
3790                 count -= len >> PAGE_SHIFT;
3791                 if (count == 0) {
3792                         sg_mark_end(sg);
3793                         return st;
3794                 }
3795
3796                 sg = __sg_next(sg);
3797                 iter = __sg_next(iter);
3798                 offset = 0;
3799         } while (1);
3800
3801 err_sg_alloc:
3802         kfree(st);
3803 err_st_alloc:
3804         return ERR_PTR(ret);
3805 }
3806
3807 static int
3808 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3809 {
3810         int ret;
3811
3812         /* The vma->pages are only valid within the lifespan of the borrowed
3813          * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3814          * must be the vma->pages. A simple rule is that vma->pages must only
3815          * be accessed when the obj->mm.pages are pinned.
3816          */
3817         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3818
3819         switch (vma->ggtt_view.type) {
3820         default:
3821                 GEM_BUG_ON(vma->ggtt_view.type);
3822                 /* fall through */
3823         case I915_GGTT_VIEW_NORMAL:
3824                 vma->pages = vma->obj->mm.pages;
3825                 return 0;
3826
3827         case I915_GGTT_VIEW_ROTATED:
3828                 vma->pages =
3829                         intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3830                 break;
3831
3832         case I915_GGTT_VIEW_PARTIAL:
3833                 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3834                 break;
3835         }
3836
3837         ret = 0;
3838         if (unlikely(IS_ERR(vma->pages))) {
3839                 ret = PTR_ERR(vma->pages);
3840                 vma->pages = NULL;
3841                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3842                           vma->ggtt_view.type, ret);
3843         }
3844         return ret;
3845 }
3846
3847 /**
3848  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3849  * @vm: the &struct i915_address_space
3850  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3851  * @size: how much space to allocate inside the GTT,
3852  *        must be #I915_GTT_PAGE_SIZE aligned
3853  * @offset: where to insert inside the GTT,
3854  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3855  *          (@offset + @size) must fit within the address space
3856  * @color: color to apply to node, if this node is not from a VMA,
3857  *         color must be #I915_COLOR_UNEVICTABLE
3858  * @flags: control search and eviction behaviour
3859  *
3860  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3861  * the address space (using @size and @color). If the @node does not fit, it
3862  * tries to evict any overlapping nodes from the GTT, including any
3863  * neighbouring nodes if the colors do not match (to ensure guard pages between
3864  * differing domains). See i915_gem_evict_for_node() for the gory details
3865  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3866  * evicting active overlapping objects, and any overlapping node that is pinned
3867  * or marked as unevictable will also result in failure.
3868  *
3869  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3870  * asked to wait for eviction and interrupted.
3871  */
3872 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3873                          struct drm_mm_node *node,
3874                          u64 size, u64 offset, unsigned long color,
3875                          unsigned int flags)
3876 {
3877         int err;
3878
3879         GEM_BUG_ON(!size);
3880         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3881         GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3882         GEM_BUG_ON(range_overflows(offset, size, vm->total));
3883         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3884         GEM_BUG_ON(drm_mm_node_allocated(node));
3885
3886         node->size = size;
3887         node->start = offset;
3888         node->color = color;
3889
3890         err = drm_mm_reserve_node(&vm->mm, node);
3891         if (err != -ENOSPC)
3892                 return err;
3893
3894         if (flags & PIN_NOEVICT)
3895                 return -ENOSPC;
3896
3897         err = i915_gem_evict_for_node(vm, node, flags);
3898         if (err == 0)
3899                 err = drm_mm_reserve_node(&vm->mm, node);
3900
3901         return err;
3902 }
3903
3904 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3905 {
3906         u64 range, addr;
3907
3908         GEM_BUG_ON(range_overflows(start, len, end));
3909         GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3910
3911         range = round_down(end - len, align) - round_up(start, align);
3912         if (range) {
3913                 if (sizeof(unsigned long) == sizeof(u64)) {
3914                         addr = get_random_long();
3915                 } else {
3916                         addr = get_random_int();
3917                         if (range > U32_MAX) {
3918                                 addr <<= 32;
3919                                 addr |= get_random_int();
3920                         }
3921                 }
3922                 div64_u64_rem(addr, range, &addr);
3923                 start += addr;
3924         }
3925
3926         return round_up(start, align);
3927 }
3928
3929 /**
3930  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3931  * @vm: the &struct i915_address_space
3932  * @node: the &struct drm_mm_node (typically i915_vma.node)
3933  * @size: how much space to allocate inside the GTT,
3934  *        must be #I915_GTT_PAGE_SIZE aligned
3935  * @alignment: required alignment of starting offset, may be 0 but
3936  *             if specified, this must be a power-of-two and at least
3937  *             #I915_GTT_MIN_ALIGNMENT
3938  * @color: color to apply to node
3939  * @start: start of any range restriction inside GTT (0 for all),
3940  *         must be #I915_GTT_PAGE_SIZE aligned
3941  * @end: end of any range restriction inside GTT (U64_MAX for all),
3942  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3943  * @flags: control search and eviction behaviour
3944  *
3945  * i915_gem_gtt_insert() first searches for an available hole into which
3946  * is can insert the node. The hole address is aligned to @alignment and
3947  * its @size must then fit entirely within the [@start, @end] bounds. The
3948  * nodes on either side of the hole must match @color, or else a guard page
3949  * will be inserted between the two nodes (or the node evicted). If no
3950  * suitable hole is found, first a victim is randomly selected and tested
3951  * for eviction, otherwise then the LRU list of objects within the GTT
3952  * is scanned to find the first set of replacement nodes to create the hole.
3953  * Those old overlapping nodes are evicted from the GTT (and so must be
3954  * rebound before any future use). Any node that is currently pinned cannot
3955  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3956  * active and #PIN_NONBLOCK is specified, that node is also skipped when
3957  * searching for an eviction candidate. See i915_gem_evict_something() for
3958  * the gory details on the eviction algorithm.
3959  *
3960  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3961  * asked to wait for eviction and interrupted.
3962  */
3963 int i915_gem_gtt_insert(struct i915_address_space *vm,
3964                         struct drm_mm_node *node,
3965                         u64 size, u64 alignment, unsigned long color,
3966                         u64 start, u64 end, unsigned int flags)
3967 {
3968         enum drm_mm_insert_mode mode;
3969         u64 offset;
3970         int err;
3971
3972         lockdep_assert_held(&vm->i915->drm.struct_mutex);
3973         GEM_BUG_ON(!size);
3974         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3975         GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3976         GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3977         GEM_BUG_ON(start >= end);
3978         GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3979         GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3980         GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3981         GEM_BUG_ON(drm_mm_node_allocated(node));
3982
3983         if (unlikely(range_overflows(start, size, end)))
3984                 return -ENOSPC;
3985
3986         if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3987                 return -ENOSPC;
3988
3989         mode = DRM_MM_INSERT_BEST;
3990         if (flags & PIN_HIGH)
3991                 mode = DRM_MM_INSERT_HIGH;
3992         if (flags & PIN_MAPPABLE)
3993                 mode = DRM_MM_INSERT_LOW;
3994
3995         /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3996          * so we know that we always have a minimum alignment of 4096.
3997          * The drm_mm range manager is optimised to return results
3998          * with zero alignment, so where possible use the optimal
3999          * path.
4000          */
4001         BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
4002         if (alignment <= I915_GTT_MIN_ALIGNMENT)
4003                 alignment = 0;
4004
4005         err = drm_mm_insert_node_in_range(&vm->mm, node,
4006                                           size, alignment, color,
4007                                           start, end, mode);
4008         if (err != -ENOSPC)
4009                 return err;
4010
4011         if (flags & PIN_NOEVICT)
4012                 return -ENOSPC;
4013
4014         /* No free space, pick a slot at random.
4015          *
4016          * There is a pathological case here using a GTT shared between
4017          * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4018          *
4019          *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4020          *         (64k objects)             (448k objects)
4021          *
4022          * Now imagine that the eviction LRU is ordered top-down (just because
4023          * pathology meets real life), and that we need to evict an object to
4024          * make room inside the aperture. The eviction scan then has to walk
4025          * the 448k list before it finds one within range. And now imagine that
4026          * it has to search for a new hole between every byte inside the memcpy,
4027          * for several simultaneous clients.
4028          *
4029          * On a full-ppgtt system, if we have run out of available space, there
4030          * will be lots and lots of objects in the eviction list! Again,
4031          * searching that LRU list may be slow if we are also applying any
4032          * range restrictions (e.g. restriction to low 4GiB) and so, for
4033          * simplicity and similarilty between different GTT, try the single
4034          * random replacement first.
4035          */
4036         offset = random_offset(start, end,
4037                                size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4038         err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4039         if (err != -ENOSPC)
4040                 return err;
4041
4042         /* Randomly selected placement is pinned, do a search */
4043         err = i915_gem_evict_something(vm, size, alignment, color,
4044                                        start, end, flags);
4045         if (err)
4046                 return err;
4047
4048         return drm_mm_insert_node_in_range(&vm->mm, node,
4049                                            size, alignment, color,
4050                                            start, end, DRM_MM_INSERT_EVICT);
4051 }
4052
4053 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4054 #include "selftests/mock_gtt.c"
4055 #include "selftests/i915_gem_gtt.c"
4056 #endif