drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/dma-fence-array.h>
  29 #include <linux/interval_tree_generic.h>
  30 #include <linux/idr.h>
  31 #include <drm/drmP.h>
  32 #include <drm/amdgpu_drm.h>
  33 #include "amdgpu.h"
  34 #include "amdgpu_trace.h"
  35 #include "amdgpu_amdkfd.h"
  36
  37 /*
  38  * GPUVM
  39  * GPUVM is similar to the legacy gart on older asics, however
  40  * rather than there being a single global gart table
  41  * for the entire GPU, there are multiple VM page tables active
  42  * at any given time.  The VM page tables can contain a mix
  43  * vram pages and system memory pages and system memory pages
  44  * can be mapped as snooped (cached system pages) or unsnooped
  45  * (uncached system pages).
  46  * Each VM has an ID associated with it and there is a page table
  47  * associated with each VMID.  When execting a command buffer,
  48  * the kernel tells the the ring what VMID to use for that command
  49  * buffer.  VMIDs are allocated dynamically as commands are submitted.
  50  * The userspace drivers maintain their own address space and the kernel
  51  * sets up their pages tables accordingly when they submit their
  52  * command buffers and a VMID is assigned.
  53  * Cayman/Trinity support up to 8 active VMs at any given time;
  54  * SI supports 16.
  55  */
  56
  57 #define START(node) ((node)->start)
  58 #define LAST(node) ((node)->last)
  59
  60 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
  61                      START, LAST, static, amdgpu_vm_it)
  62
  63 #undef START
  64 #undef LAST
  65
  66 /* Local structure. Encapsulate some VM table update parameters to reduce
  67  * the number of function parameters
  68  */
  69 struct amdgpu_pte_update_params {
  70         /* amdgpu device we do this update for */
  71         struct amdgpu_device *adev;
  72         /* optional amdgpu_vm we do this update for */
  73         struct amdgpu_vm *vm;
  74         /* address where to copy page table entries from */
  75         uint64_t src;
  76         /* indirect buffer to fill with commands */
  77         struct amdgpu_ib *ib;
  78         /* Function which actually does the update */
  79         void (*func)(struct amdgpu_pte_update_params *params,
  80                      struct amdgpu_bo *bo, uint64_t pe,
  81                      uint64_t addr, unsigned count, uint32_t incr,
  82                      uint64_t flags);
  83         /* The next two are used during VM update by CPU
  84          *  DMA addresses to use for mapping
  85          *  Kernel pointer of PD/PT BO that needs to be updated
  86          */
  87         dma_addr_t *pages_addr;
  88         void *kptr;
  89 };
  90
  91 /* Helper to disable partial resident texture feature from a fence callback */
  92 struct amdgpu_prt_cb {
  93         struct amdgpu_device *adev;
  94         struct dma_fence_cb cb;
  95 };
  96
  97 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
  98                                    struct amdgpu_vm *vm,
  99                                    struct amdgpu_bo *bo)
 100 {
 101         base->vm = vm;
 102         base->bo = bo;
 103         INIT_LIST_HEAD(&base->bo_list);
 104         INIT_LIST_HEAD(&base->vm_status);
 105
 106         if (!bo)
 107                 return;
 108         list_add_tail(&base->bo_list, &bo->va);
 109
 110         if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
 111                 return;
 112
 113         if (bo->preferred_domains &
 114             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
 115                 return;
 116
 117         /*
 118          * we checked all the prerequisites, but it looks like this per vm bo
 119          * is currently evicted. add the bo to the evicted list to make sure it
 120          * is validated on next vm use to avoid fault.
 121          * */
 122         list_move_tail(&base->vm_status, &vm->evicted);
 123 }
 124
 125 /**
 126  * amdgpu_vm_level_shift - return the addr shift for each level
 127  *
 128  * @adev: amdgpu_device pointer
 129  *
 130  * Returns the number of bits the pfn needs to be right shifted for a level.
 131  */
 132 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 133                                       unsigned level)
 134 {
 135         unsigned shift = 0xff;
 136
 137         switch (level) {
 138         case AMDGPU_VM_PDB2:
 139         case AMDGPU_VM_PDB1:
 140         case AMDGPU_VM_PDB0:
 141                 shift = 9 * (AMDGPU_VM_PDB0 - level) +
 142                         adev->vm_manager.block_size;
 143                 break;
 144         case AMDGPU_VM_PTB:
 145                 shift = 0;
 146                 break;
 147         default:
 148                 dev_err(adev->dev, "the level%d isn't supported.\n", level);
 149         }
 150
 151         return shift;
 152 }
 153
 154 /**
 155  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
 156  *
 157  * @adev: amdgpu_device pointer
 158  *
 159  * Calculate the number of entries in a page directory or page table.
 160  */
 161 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 162                                       unsigned level)
 163 {
 164         unsigned shift = amdgpu_vm_level_shift(adev,
 165                                                adev->vm_manager.root_level);
 166
 167         if (level == adev->vm_manager.root_level)
 168                 /* For the root directory */
 169                 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
 170         else if (level != AMDGPU_VM_PTB)
 171                 /* Everything in between */
 172                 return 512;
 173         else
 174                 /* For the page tables on the leaves */
 175                 return AMDGPU_VM_PTE_COUNT(adev);
 176 }
 177
 178 /**
 179  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
 180  *
 181  * @adev: amdgpu_device pointer
 182  *
 183  * Calculate the size of the BO for a page directory or page table in bytes.
 184  */
 185 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 186 {
 187         return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 188 }
 189
 190 /**
 191  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
 192  *
 193  * @vm: vm providing the BOs
 194  * @validated: head of validation list
 195  * @entry: entry to add
 196  *
 197  * Add the page directory to the list of BOs to
 198  * validate for command submission.
 199  */
 200 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 201                          struct list_head *validated,
 202                          struct amdgpu_bo_list_entry *entry)
 203 {
 204         entry->robj = vm->root.base.bo;
 205         entry->priority = 0;
 206         entry->tv.bo = &entry->robj->tbo;
 207         entry->tv.shared = true;
 208         entry->user_pages = NULL;
 209         list_add(&entry->tv.head, validated);
 210 }
 211
 212 /**
 213  * amdgpu_vm_validate_pt_bos - validate the page table BOs
 214  *
 215  * @adev: amdgpu device pointer
 216  * @vm: vm providing the BOs
 217  * @validate: callback to do the validation
 218  * @param: parameter for the validation callback
 219  *
 220  * Validate the page table BOs on command submission if neccessary.
 221  */
 222 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 223                               int (*validate)(void *p, struct amdgpu_bo *bo),
 224                               void *param)
 225 {
 226         struct ttm_bo_global *glob = adev->mman.bdev.glob;
 227         int r;
 228
 229         while (!list_empty(&vm->evicted)) {
 230                 struct amdgpu_vm_bo_base *bo_base;
 231                 struct amdgpu_bo *bo;
 232
 233                 bo_base = list_first_entry(&vm->evicted,
 234                                            struct amdgpu_vm_bo_base,
 235                                            vm_status);
 236
 237                 bo = bo_base->bo;
 238                 if (bo->parent) {
 239                         r = validate(param, bo);
 240                         if (r)
 241                                 return r;
 242
 243                         spin_lock(&glob->lru_lock);
 244                         ttm_bo_move_to_lru_tail(&bo->tbo);
 245                         if (bo->shadow)
 246                                 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
 247                         spin_unlock(&glob->lru_lock);
 248                 }
 249
 250                 if (bo->tbo.type == ttm_bo_type_kernel &&
 251                     vm->use_cpu_for_update) {
 252                         r = amdgpu_bo_kmap(bo, NULL);
 253                         if (r)
 254                                 return r;
 255                 }
 256
 257                 if (bo->tbo.type != ttm_bo_type_kernel) {
 258                         spin_lock(&vm->moved_lock);
 259                         list_move(&bo_base->vm_status, &vm->moved);
 260                         spin_unlock(&vm->moved_lock);
 261                 } else {
 262                         list_move(&bo_base->vm_status, &vm->relocated);
 263                 }
 264         }
 265
 266         return 0;
 267 }
 268
 269 /**
 270  * amdgpu_vm_ready - check VM is ready for updates
 271  *
 272  * @vm: VM to check
 273  *
 274  * Check if all VM PDs/PTs are ready for updates
 275  */
 276 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 277 {
 278         return list_empty(&vm->evicted);
 279 }
 280
 281 /**
 282  * amdgpu_vm_clear_bo - initially clear the PDs/PTs
 283  *
 284  * @adev: amdgpu_device pointer
 285  * @bo: BO to clear
 286  * @level: level this BO is at
 287  *
 288  * Root PD needs to be reserved when calling this.
 289  */
 290 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 291                               struct amdgpu_vm *vm, struct amdgpu_bo *bo,
 292                               unsigned level, bool pte_support_ats)
 293 {
 294         struct ttm_operation_ctx ctx = { true, false };
 295         struct dma_fence *fence = NULL;
 296         unsigned entries, ats_entries;
 297         struct amdgpu_ring *ring;
 298         struct amdgpu_job *job;
 299         uint64_t addr;
 300         int r;
 301
 302         addr = amdgpu_bo_gpu_offset(bo);
 303         entries = amdgpu_bo_size(bo) / 8;
 304
 305         if (pte_support_ats) {
 306                 if (level == adev->vm_manager.root_level) {
 307                         ats_entries = amdgpu_vm_level_shift(adev, level);
 308                         ats_entries += AMDGPU_GPU_PAGE_SHIFT;
 309                         ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
 310                         ats_entries = min(ats_entries, entries);
 311                         entries -= ats_entries;
 312                 } else {
 313                         ats_entries = entries;
 314                         entries = 0;
 315                 }
 316         } else {
 317                 ats_entries = 0;
 318         }
 319
 320         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 321
 322         r = reservation_object_reserve_shared(bo->tbo.resv);
 323         if (r)
 324                 return r;
 325
 326         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 327         if (r)
 328                 goto error;
 329
 330         r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 331         if (r)
 332                 goto error;
 333
 334         if (ats_entries) {
 335                 uint64_t ats_value;
 336
 337                 ats_value = AMDGPU_PTE_DEFAULT_ATC;
 338                 if (level != AMDGPU_VM_PTB)
 339                         ats_value |= AMDGPU_PDE_PTE;
 340
 341                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 342                                       ats_entries, 0, ats_value);
 343                 addr += ats_entries * 8;
 344         }
 345
 346         if (entries)
 347                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 348                                       entries, 0, 0);
 349
 350         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 351
 352         WARN_ON(job->ibs[0].length_dw > 64);
 353         r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
 354                              AMDGPU_FENCE_OWNER_UNDEFINED, false);
 355         if (r)
 356                 goto error_free;
 357
 358         r = amdgpu_job_submit(job, ring, &vm->entity,
 359                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 360         if (r)
 361                 goto error_free;
 362
 363         amdgpu_bo_fence(bo, fence, true);
 364         dma_fence_put(fence);
 365
 366         if (bo->shadow)
 367                 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
 368                                           level, pte_support_ats);
 369
 370         return 0;
 371
 372 error_free:
 373         amdgpu_job_free(job);
 374
 375 error:
 376         return r;
 377 }
 378
 379 /**
 380  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
 381  *
 382  * @adev: amdgpu_device pointer
 383  * @vm: requested vm
 384  * @saddr: start of the address range
 385  * @eaddr: end of the address range
 386  *
 387  * Make sure the page directories and page tables are allocated
 388  */
 389 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 390                                   struct amdgpu_vm *vm,
 391                                   struct amdgpu_vm_pt *parent,
 392                                   uint64_t saddr, uint64_t eaddr,
 393                                   unsigned level, bool ats)
 394 {
 395         unsigned shift = amdgpu_vm_level_shift(adev, level);
 396         unsigned pt_idx, from, to;
 397         u64 flags;
 398         int r;
 399
 400         if (!parent->entries) {
 401                 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 402
 403                 parent->entries = kvmalloc_array(num_entries,
 404                                                    sizeof(struct amdgpu_vm_pt),
 405                                                    GFP_KERNEL | __GFP_ZERO);
 406                 if (!parent->entries)
 407                         return -ENOMEM;
 408                 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
 409         }
 410
 411         from = saddr >> shift;
 412         to = eaddr >> shift;
 413         if (from >= amdgpu_vm_num_entries(adev, level) ||
 414             to >= amdgpu_vm_num_entries(adev, level))
 415                 return -EINVAL;
 416
 417         ++level;
 418         saddr = saddr & ((1 << shift) - 1);
 419         eaddr = eaddr & ((1 << shift) - 1);
 420
 421         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 422         if (vm->use_cpu_for_update)
 423                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 424         else
 425                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 426                                 AMDGPU_GEM_CREATE_SHADOW);
 427
 428         /* walk over the address space and allocate the page tables */
 429         for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 430                 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 431                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 432                 struct amdgpu_bo *pt;
 433
 434                 if (!entry->base.bo) {
 435                         struct amdgpu_bo_param bp;
 436
 437                         memset(&bp, 0, sizeof(bp));
 438                         bp.size = amdgpu_vm_bo_size(adev, level);
 439                         bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
 440                         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
 441                         bp.flags = flags;
 442                         bp.type = ttm_bo_type_kernel;
 443                         bp.resv = resv;
 444                         r = amdgpu_bo_create(adev, &bp, &pt);
 445                         if (r)
 446                                 return r;
 447
 448                         r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
 449                         if (r) {
 450                                 amdgpu_bo_unref(&pt->shadow);
 451                                 amdgpu_bo_unref(&pt);
 452                                 return r;
 453                         }
 454
 455                         if (vm->use_cpu_for_update) {
 456                                 r = amdgpu_bo_kmap(pt, NULL);
 457                                 if (r) {
 458                                         amdgpu_bo_unref(&pt->shadow);
 459                                         amdgpu_bo_unref(&pt);
 460                                         return r;
 461                                 }
 462                         }
 463
 464                         /* Keep a reference to the root directory to avoid
 465                         * freeing them up in the wrong order.
 466                         */
 467                         pt->parent = amdgpu_bo_ref(parent->base.bo);
 468
 469                         amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 470                         list_move(&entry->base.vm_status, &vm->relocated);
 471                 }
 472
 473                 if (level < AMDGPU_VM_PTB) {
 474                         uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 475                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 476                                 ((1 << shift) - 1);
 477                         r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
 478                                                    sub_eaddr, level, ats);
 479                         if (r)
 480                                 return r;
 481                 }
 482         }
 483
 484         return 0;
 485 }
 486
 487 /**
 488  * amdgpu_vm_alloc_pts - Allocate page tables.
 489  *
 490  * @adev: amdgpu_device pointer
 491  * @vm: VM to allocate page tables for
 492  * @saddr: Start address which needs to be allocated
 493  * @size: Size from start address we need.
 494  *
 495  * Make sure the page tables are allocated.
 496  */
 497 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 498                         struct amdgpu_vm *vm,
 499                         uint64_t saddr, uint64_t size)
 500 {
 501         uint64_t eaddr;
 502         bool ats = false;
 503
 504         /* validate the parameters */
 505         if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 506                 return -EINVAL;
 507
 508         eaddr = saddr + size - 1;
 509
 510         if (vm->pte_support_ats)
 511                 ats = saddr < AMDGPU_VA_HOLE_START;
 512
 513         saddr /= AMDGPU_GPU_PAGE_SIZE;
 514         eaddr /= AMDGPU_GPU_PAGE_SIZE;
 515
 516         if (eaddr >= adev->vm_manager.max_pfn) {
 517                 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
 518                         eaddr, adev->vm_manager.max_pfn);
 519                 return -EINVAL;
 520         }
 521
 522         return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
 523                                       adev->vm_manager.root_level, ats);
 524 }
 525
 526 /**
 527  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
 528  *
 529  * @adev: amdgpu_device pointer
 530  */
 531 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 532 {
 533         const struct amdgpu_ip_block *ip_block;
 534         bool has_compute_vm_bug;
 535         struct amdgpu_ring *ring;
 536         int i;
 537
 538         has_compute_vm_bug = false;
 539
 540         ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 541         if (ip_block) {
 542                 /* Compute has a VM bug for GFX version < 7.
 543                    Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
 544                 if (ip_block->version->major <= 7)
 545                         has_compute_vm_bug = true;
 546                 else if (ip_block->version->major == 8)
 547                         if (adev->gfx.mec_fw_version < 673)
 548                                 has_compute_vm_bug = true;
 549         }
 550
 551         for (i = 0; i < adev->num_rings; i++) {
 552                 ring = adev->rings[i];
 553                 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
 554                         /* only compute rings */
 555                         ring->has_compute_vm_bug = has_compute_vm_bug;
 556                 else
 557                         ring->has_compute_vm_bug = false;
 558         }
 559 }
 560
 561 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 562                                   struct amdgpu_job *job)
 563 {
 564         struct amdgpu_device *adev = ring->adev;
 565         unsigned vmhub = ring->funcs->vmhub;
 566         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 567         struct amdgpu_vmid *id;
 568         bool gds_switch_needed;
 569         bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 570
 571         if (job->vmid == 0)
 572                 return false;
 573         id = &id_mgr->ids[job->vmid];
 574         gds_switch_needed = ring->funcs->emit_gds_switch && (
 575                 id->gds_base != job->gds_base ||
 576                 id->gds_size != job->gds_size ||
 577                 id->gws_base != job->gws_base ||
 578                 id->gws_size != job->gws_size ||
 579                 id->oa_base != job->oa_base ||
 580                 id->oa_size != job->oa_size);
 581
 582         if (amdgpu_vmid_had_gpu_reset(adev, id))
 583                 return true;
 584
 585         return vm_flush_needed || gds_switch_needed;
 586 }
 587
 588 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 589 {
 590         return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
 591 }
 592
 593 /**
 594  * amdgpu_vm_flush - hardware flush the vm
 595  *
 596  * @ring: ring to use for flush
 597  * @vmid: vmid number to use
 598  * @pd_addr: address of the page directory
 599  *
 600  * Emit a VM flush when it is necessary.
 601  */
 602 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
 603 {
 604         struct amdgpu_device *adev = ring->adev;
 605         unsigned vmhub = ring->funcs->vmhub;
 606         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 607         struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 608         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 609                 id->gds_base != job->gds_base ||
 610                 id->gds_size != job->gds_size ||
 611                 id->gws_base != job->gws_base ||
 612                 id->gws_size != job->gws_size ||
 613                 id->oa_base != job->oa_base ||
 614                 id->oa_size != job->oa_size);
 615         bool vm_flush_needed = job->vm_needs_flush;
 616         bool pasid_mapping_needed = id->pasid != job->pasid ||
 617                 !id->pasid_mapping ||
 618                 !dma_fence_is_signaled(id->pasid_mapping);
 619         struct dma_fence *fence = NULL;
 620         unsigned patch_offset = 0;
 621         int r;
 622
 623         if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 624                 gds_switch_needed = true;
 625                 vm_flush_needed = true;
 626                 pasid_mapping_needed = true;
 627         }
 628
 629         gds_switch_needed &= !!ring->funcs->emit_gds_switch;
 630         vm_flush_needed &= !!ring->funcs->emit_vm_flush;
 631         pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
 632                 ring->funcs->emit_wreg;
 633
 634         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 635                 return 0;
 636
 637         if (ring->funcs->init_cond_exec)
 638                 patch_offset = amdgpu_ring_init_cond_exec(ring);
 639
 640         if (need_pipe_sync)
 641                 amdgpu_ring_emit_pipeline_sync(ring);
 642
 643         if (vm_flush_needed) {
 644                 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 645                 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 646         }
 647
 648         if (pasid_mapping_needed)
 649                 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 650
 651         if (vm_flush_needed || pasid_mapping_needed) {
 652                 r = amdgpu_fence_emit(ring, &fence, 0);
 653                 if (r)
 654                         return r;
 655         }
 656
 657         if (vm_flush_needed) {
 658                 mutex_lock(&id_mgr->lock);
 659                 dma_fence_put(id->last_flush);
 660                 id->last_flush = dma_fence_get(fence);
 661                 id->current_gpu_reset_count =
 662                         atomic_read(&adev->gpu_reset_counter);
 663                 mutex_unlock(&id_mgr->lock);
 664         }
 665
 666         if (pasid_mapping_needed) {
 667                 id->pasid = job->pasid;
 668                 dma_fence_put(id->pasid_mapping);
 669                 id->pasid_mapping = dma_fence_get(fence);
 670         }
 671         dma_fence_put(fence);
 672
 673         if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 674                 id->gds_base = job->gds_base;
 675                 id->gds_size = job->gds_size;
 676                 id->gws_base = job->gws_base;
 677                 id->gws_size = job->gws_size;
 678                 id->oa_base = job->oa_base;
 679                 id->oa_size = job->oa_size;
 680                 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 681                                             job->gds_size, job->gws_base,
 682                                             job->gws_size, job->oa_base,
 683                                             job->oa_size);
 684         }
 685
 686         if (ring->funcs->patch_cond_exec)
 687                 amdgpu_ring_patch_cond_exec(ring, patch_offset);
 688
 689         /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
 690         if (ring->funcs->emit_switch_buffer) {
 691                 amdgpu_ring_emit_switch_buffer(ring);
 692                 amdgpu_ring_emit_switch_buffer(ring);
 693         }
 694         return 0;
 695 }
 696
 697 /**
 698  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
 699  *
 700  * @vm: requested vm
 701  * @bo: requested buffer object
 702  *
 703  * Find @bo inside the requested vm.
 704  * Search inside the @bos vm list for the requested vm
 705  * Returns the found bo_va or NULL if none is found
 706  *
 707  * Object has to be reserved!
 708  */
 709 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 710                                        struct amdgpu_bo *bo)
 711 {
 712         struct amdgpu_bo_va *bo_va;
 713
 714         list_for_each_entry(bo_va, &bo->va, base.bo_list) {
 715                 if (bo_va->base.vm == vm) {
 716                         return bo_va;
 717                 }
 718         }
 719         return NULL;
 720 }
 721
 722 /**
 723  * amdgpu_vm_do_set_ptes - helper to call the right asic function
 724  *
 725  * @params: see amdgpu_pte_update_params definition
 726  * @bo: PD/PT to update
 727  * @pe: addr of the page entry
 728  * @addr: dst addr to write into pe
 729  * @count: number of page entries to update
 730  * @incr: increase next addr by incr bytes
 731  * @flags: hw access flags
 732  *
 733  * Traces the parameters and calls the right asic functions
 734  * to setup the page table using the DMA.
 735  */
 736 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
 737                                   struct amdgpu_bo *bo,
 738                                   uint64_t pe, uint64_t addr,
 739                                   unsigned count, uint32_t incr,
 740                                   uint64_t flags)
 741 {
 742         pe += amdgpu_bo_gpu_offset(bo);
 743         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 744
 745         if (count < 3) {
 746                 amdgpu_vm_write_pte(params->adev, params->ib, pe,
 747                                     addr | flags, count, incr);
 748
 749         } else {
 750                 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
 751                                       count, incr, flags);
 752         }
 753 }
 754
 755 /**
 756  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
 757  *
 758  * @params: see amdgpu_pte_update_params definition
 759  * @bo: PD/PT to update
 760  * @pe: addr of the page entry
 761  * @addr: dst addr to write into pe
 762  * @count: number of page entries to update
 763  * @incr: increase next addr by incr bytes
 764  * @flags: hw access flags
 765  *
 766  * Traces the parameters and calls the DMA function to copy the PTEs.
 767  */
 768 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 769                                    struct amdgpu_bo *bo,
 770                                    uint64_t pe, uint64_t addr,
 771                                    unsigned count, uint32_t incr,
 772                                    uint64_t flags)
 773 {
 774         uint64_t src = (params->src + (addr >> 12) * 8);
 775
 776         pe += amdgpu_bo_gpu_offset(bo);
 777         trace_amdgpu_vm_copy_ptes(pe, src, count);
 778
 779         amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
 780 }
 781
 782 /**
 783  * amdgpu_vm_map_gart - Resolve gart mapping of addr
 784  *
 785  * @pages_addr: optional DMA address to use for lookup
 786  * @addr: the unmapped addr
 787  *
 788  * Look up the physical address of the page that the pte resolves
 789  * to and return the pointer for the page table entry.
 790  */
 791 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 792 {
 793         uint64_t result;
 794
 795         /* page table offset */
 796         result = pages_addr[addr >> PAGE_SHIFT];
 797
 798         /* in case cpu page size != gpu page size*/
 799         result |= addr & (~PAGE_MASK);
 800
 801         result &= 0xFFFFFFFFFFFFF000ULL;
 802
 803         return result;
 804 }
 805
 806 /**
 807  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
 808  *
 809  * @params: see amdgpu_pte_update_params definition
 810  * @bo: PD/PT to update
 811  * @pe: kmap addr of the page entry
 812  * @addr: dst addr to write into pe
 813  * @count: number of page entries to update
 814  * @incr: increase next addr by incr bytes
 815  * @flags: hw access flags
 816  *
 817  * Write count number of PT/PD entries directly.
 818  */
 819 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 820                                    struct amdgpu_bo *bo,
 821                                    uint64_t pe, uint64_t addr,
 822                                    unsigned count, uint32_t incr,
 823                                    uint64_t flags)
 824 {
 825         unsigned int i;
 826         uint64_t value;
 827
 828         pe += (unsigned long)amdgpu_bo_kptr(bo);
 829
 830         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 831
 832         for (i = 0; i < count; i++) {
 833                 value = params->pages_addr ?
 834                         amdgpu_vm_map_gart(params->pages_addr, addr) :
 835                         addr;
 836                 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
 837                                        i, value, flags);
 838                 addr += incr;
 839         }
 840 }
 841
 842 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 843                              void *owner)
 844 {
 845         struct amdgpu_sync sync;
 846         int r;
 847
 848         amdgpu_sync_create(&sync);
 849         amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
 850         r = amdgpu_sync_wait(&sync, true);
 851         amdgpu_sync_free(&sync);
 852
 853         return r;
 854 }
 855
 856 /*
 857  * amdgpu_vm_update_pde - update a single level in the hierarchy
 858  *
 859  * @param: parameters for the update
 860  * @vm: requested vm
 861  * @parent: parent directory
 862  * @entry: entry to update
 863  *
 864  * Makes sure the requested entry in parent is up to date.
 865  */
 866 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 867                                  struct amdgpu_vm *vm,
 868                                  struct amdgpu_vm_pt *parent,
 869                                  struct amdgpu_vm_pt *entry)
 870 {
 871         struct amdgpu_bo *bo = parent->base.bo, *pbo;
 872         uint64_t pde, pt, flags;
 873         unsigned level;
 874
 875         /* Don't update huge pages here */
 876         if (entry->huge)
 877                 return;
 878
 879         for (level = 0, pbo = bo->parent; pbo; ++level)
 880                 pbo = pbo->parent;
 881
 882         level += params->adev->vm_manager.root_level;
 883         pt = amdgpu_bo_gpu_offset(entry->base.bo);
 884         flags = AMDGPU_PTE_VALID;
 885         amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
 886         pde = (entry - parent->entries) * 8;
 887         if (bo->shadow)
 888                 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
 889         params->func(params, bo, pde, pt, 1, 0, flags);
 890 }
 891
 892 /*
 893  * amdgpu_vm_invalidate_level - mark all PD levels as invalid
 894  *
 895  * @parent: parent PD
 896  *
 897  * Mark all PD level as invalid after an error.
 898  */
 899 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 900                                        struct amdgpu_vm *vm,
 901                                        struct amdgpu_vm_pt *parent,
 902                                        unsigned level)
 903 {
 904         unsigned pt_idx, num_entries;
 905
 906         /*
 907          * Recurse into the subdirectories. This recursion is harmless because
 908          * we only have a maximum of 5 layers.
 909          */
 910         num_entries = amdgpu_vm_num_entries(adev, level);
 911         for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 912                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 913
 914                 if (!entry->base.bo)
 915                         continue;
 916
 917                 if (list_empty(&entry->base.vm_status))
 918                         list_add(&entry->base.vm_status, &vm->relocated);
 919                 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 920         }
 921 }
 922
 923 /*
 924  * amdgpu_vm_update_directories - make sure that all directories are valid
 925  *
 926  * @adev: amdgpu_device pointer
 927  * @vm: requested vm
 928  *
 929  * Makes sure all directories are up to date.
 930  * Returns 0 for success, error for failure.
 931  */
 932 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 933                                  struct amdgpu_vm *vm)
 934 {
 935         struct amdgpu_pte_update_params params;
 936         struct amdgpu_job *job;
 937         unsigned ndw = 0;
 938         int r = 0;
 939
 940         if (list_empty(&vm->relocated))
 941                 return 0;
 942
 943 restart:
 944         memset(&params, 0, sizeof(params));
 945         params.adev = adev;
 946
 947         if (vm->use_cpu_for_update) {
 948                 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 949                 if (unlikely(r))
 950                         return r;
 951
 952                 params.func = amdgpu_vm_cpu_set_ptes;
 953         } else {
 954                 ndw = 512 * 8;
 955                 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 956                 if (r)
 957                         return r;
 958
 959                 params.ib = &job->ibs[0];
 960                 params.func = amdgpu_vm_do_set_ptes;
 961         }
 962
 963         while (!list_empty(&vm->relocated)) {
 964                 struct amdgpu_vm_bo_base *bo_base, *parent;
 965                 struct amdgpu_vm_pt *pt, *entry;
 966                 struct amdgpu_bo *bo;
 967
 968                 bo_base = list_first_entry(&vm->relocated,
 969                                            struct amdgpu_vm_bo_base,
 970                                            vm_status);
 971                 list_del_init(&bo_base->vm_status);
 972
 973                 bo = bo_base->bo->parent;
 974                 if (!bo)
 975                         continue;
 976
 977                 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 978                                           bo_list);
 979                 pt = container_of(parent, struct amdgpu_vm_pt, base);
 980                 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
 981
 982                 amdgpu_vm_update_pde(&params, vm, pt, entry);
 983
 984                 if (!vm->use_cpu_for_update &&
 985                     (ndw - params.ib->length_dw) < 32)
 986                         break;
 987         }
 988
 989         if (vm->use_cpu_for_update) {
 990                 /* Flush HDP */
 991                 mb();
 992                 amdgpu_asic_flush_hdp(adev, NULL);
 993         } else if (params.ib->length_dw == 0) {
 994                 amdgpu_job_free(job);
 995         } else {
 996                 struct amdgpu_bo *root = vm->root.base.bo;
 997                 struct amdgpu_ring *ring;
 998                 struct dma_fence *fence;
 999
1000                 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1001                                     sched);
1002
1003                 amdgpu_ring_pad_ib(ring, params.ib);
1004                 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1005                                  AMDGPU_FENCE_OWNER_VM, false);
1006                 WARN_ON(params.ib->length_dw > ndw);
1007                 r = amdgpu_job_submit(job, ring, &vm->entity,
1008                                       AMDGPU_FENCE_OWNER_VM, &fence);
1009                 if (r)
1010                         goto error;
1011
1012                 amdgpu_bo_fence(root, fence, true);
1013                 dma_fence_put(vm->last_update);
1014                 vm->last_update = fence;
1015         }
1016
1017         if (!list_empty(&vm->relocated))
1018                 goto restart;
1019
1020         return 0;
1021
1022 error:
1023         amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1024                                    adev->vm_manager.root_level);
1025         amdgpu_job_free(job);
1026         return r;
1027 }
1028
1029 /**
1030  * amdgpu_vm_find_entry - find the entry for an address
1031  *
1032  * @p: see amdgpu_pte_update_params definition
1033  * @addr: virtual address in question
1034  * @entry: resulting entry or NULL
1035  * @parent: parent entry
1036  *
1037  * Find the vm_pt entry and it's parent for the given address.
1038  */
1039 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1040                          struct amdgpu_vm_pt **entry,
1041                          struct amdgpu_vm_pt **parent)
1042 {
1043         unsigned level = p->adev->vm_manager.root_level;
1044
1045         *parent = NULL;
1046         *entry = &p->vm->root;
1047         while ((*entry)->entries) {
1048                 unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1049
1050                 *parent = *entry;
1051                 *entry = &(*entry)->entries[addr >> shift];
1052                 addr &= (1ULL << shift) - 1;
1053         }
1054
1055         if (level != AMDGPU_VM_PTB)
1056                 *entry = NULL;
1057 }
1058
1059 /**
1060  * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1061  *
1062  * @p: see amdgpu_pte_update_params definition
1063  * @entry: vm_pt entry to check
1064  * @parent: parent entry
1065  * @nptes: number of PTEs updated with this operation
1066  * @dst: destination address where the PTEs should point to
1067  * @flags: access flags fro the PTEs
1068  *
1069  * Check if we can update the PD with a huge page.
1070  */
1071 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1072                                         struct amdgpu_vm_pt *entry,
1073                                         struct amdgpu_vm_pt *parent,
1074                                         unsigned nptes, uint64_t dst,
1075                                         uint64_t flags)
1076 {
1077         uint64_t pde;
1078
1079         /* In the case of a mixed PT the PDE must point to it*/
1080         if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1081             nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1082                 /* Set the huge page flag to stop scanning at this PDE */
1083                 flags |= AMDGPU_PDE_PTE;
1084         }
1085
1086         if (!(flags & AMDGPU_PDE_PTE)) {
1087                 if (entry->huge) {
1088                         /* Add the entry to the relocated list to update it. */
1089                         entry->huge = false;
1090                         list_move(&entry->base.vm_status, &p->vm->relocated);
1091                 }
1092                 return;
1093         }
1094
1095         entry->huge = true;
1096         amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1097
1098         pde = (entry - parent->entries) * 8;
1099         if (parent->base.bo->shadow)
1100                 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1101         p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1102 }
1103
1104 /**
1105  * amdgpu_vm_update_ptes - make sure that page tables are valid
1106  *
1107  * @params: see amdgpu_pte_update_params definition
1108  * @vm: requested vm
1109  * @start: start of GPU address range
1110  * @end: end of GPU address range
1111  * @dst: destination address to map to, the next dst inside the function
1112  * @flags: mapping flags
1113  *
1114  * Update the page tables in the range @start - @end.
1115  * Returns 0 for success, -EINVAL for failure.
1116  */
1117 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1118                                   uint64_t start, uint64_t end,
1119                                   uint64_t dst, uint64_t flags)
1120 {
1121         struct amdgpu_device *adev = params->adev;
1122         const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1123
1124         uint64_t addr, pe_start;
1125         struct amdgpu_bo *pt;
1126         unsigned nptes;
1127
1128         /* walk over the address space and update the page tables */
1129         for (addr = start; addr < end; addr += nptes,
1130              dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1131                 struct amdgpu_vm_pt *entry, *parent;
1132
1133                 amdgpu_vm_get_entry(params, addr, &entry, &parent);
1134                 if (!entry)
1135                         return -ENOENT;
1136
1137                 if ((addr & ~mask) == (end & ~mask))
1138                         nptes = end - addr;
1139                 else
1140                         nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1141
1142                 amdgpu_vm_handle_huge_pages(params, entry, parent,
1143                                             nptes, dst, flags);
1144                 /* We don't need to update PTEs for huge pages */
1145                 if (entry->huge)
1146                         continue;
1147
1148                 pt = entry->base.bo;
1149                 pe_start = (addr & mask) * 8;
1150                 if (pt->shadow)
1151                         params->func(params, pt->shadow, pe_start, dst, nptes,
1152                                      AMDGPU_GPU_PAGE_SIZE, flags);
1153                 params->func(params, pt, pe_start, dst, nptes,
1154                              AMDGPU_GPU_PAGE_SIZE, flags);
1155         }
1156
1157         return 0;
1158 }
1159
1160 /*
1161  * amdgpu_vm_frag_ptes - add fragment information to PTEs
1162  *
1163  * @params: see amdgpu_pte_update_params definition
1164  * @vm: requested vm
1165  * @start: first PTE to handle
1166  * @end: last PTE to handle
1167  * @dst: addr those PTEs should point to
1168  * @flags: hw mapping flags
1169  * Returns 0 for success, -EINVAL for failure.
1170  */
1171 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params  *params,
1172                                 uint64_t start, uint64_t end,
1173                                 uint64_t dst, uint64_t flags)
1174 {
1175         /**
1176          * The MC L1 TLB supports variable sized pages, based on a fragment
1177          * field in the PTE. When this field is set to a non-zero value, page
1178          * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1179          * flags are considered valid for all PTEs within the fragment range
1180          * and corresponding mappings are assumed to be physically contiguous.
1181          *
1182          * The L1 TLB can store a single PTE for the whole fragment,
1183          * significantly increasing the space available for translation
1184          * caching. This leads to large improvements in throughput when the
1185          * TLB is under pressure.
1186          *
1187          * The L2 TLB distributes small and large fragments into two
1188          * asymmetric partitions. The large fragment cache is significantly
1189          * larger. Thus, we try to use large fragments wherever possible.
1190          * Userspace can support this by aligning virtual base address and
1191          * allocation size to the fragment size.
1192          */
1193         unsigned max_frag = params->adev->vm_manager.fragment_size;
1194         int r;
1195
1196         /* system pages are non continuously */
1197         if (params->src || !(flags & AMDGPU_PTE_VALID))
1198                 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1199
1200         while (start != end) {
1201                 uint64_t frag_flags, frag_end;
1202                 unsigned frag;
1203
1204                 /* This intentionally wraps around if no bit is set */
1205                 frag = min((unsigned)ffs(start) - 1,
1206                            (unsigned)fls64(end - start) - 1);
1207                 if (frag >= max_frag) {
1208                         frag_flags = AMDGPU_PTE_FRAG(max_frag);
1209                         frag_end = end & ~((1ULL << max_frag) - 1);
1210                 } else {
1211                         frag_flags = AMDGPU_PTE_FRAG(frag);
1212                         frag_end = start + (1 << frag);
1213                 }
1214
1215                 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1216                                           flags | frag_flags);
1217                 if (r)
1218                         return r;
1219
1220                 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1221                 start = frag_end;
1222         }
1223
1224         return 0;
1225 }
1226
1227 /**
1228  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1229  *
1230  * @adev: amdgpu_device pointer
1231  * @exclusive: fence we need to sync to
1232  * @pages_addr: DMA addresses to use for mapping
1233  * @vm: requested vm
1234  * @start: start of mapped range
1235  * @last: last mapped entry
1236  * @flags: flags for the entries
1237  * @addr: addr to set the area to
1238  * @fence: optional resulting fence
1239  *
1240  * Fill in the page table entries between @start and @last.
1241  * Returns 0 for success, -EINVAL for failure.
1242  */
1243 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1244                                        struct dma_fence *exclusive,
1245                                        dma_addr_t *pages_addr,
1246                                        struct amdgpu_vm *vm,
1247                                        uint64_t start, uint64_t last,
1248                                        uint64_t flags, uint64_t addr,
1249                                        struct dma_fence **fence)
1250 {
1251         struct amdgpu_ring *ring;
1252         void *owner = AMDGPU_FENCE_OWNER_VM;
1253         unsigned nptes, ncmds, ndw;
1254         struct amdgpu_job *job;
1255         struct amdgpu_pte_update_params params;
1256         struct dma_fence *f = NULL;
1257         int r;
1258
1259         memset(&params, 0, sizeof(params));
1260         params.adev = adev;
1261         params.vm = vm;
1262
1263         /* sync to everything on unmapping */
1264         if (!(flags & AMDGPU_PTE_VALID))
1265                 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1266
1267         if (vm->use_cpu_for_update) {
1268                 /* params.src is used as flag to indicate system Memory */
1269                 if (pages_addr)
1270                         params.src = ~0;
1271
1272                 /* Wait for PT BOs to be free. PTs share the same resv. object
1273                  * as the root PD BO
1274                  */
1275                 r = amdgpu_vm_wait_pd(adev, vm, owner);
1276                 if (unlikely(r))
1277                         return r;
1278
1279                 params.func = amdgpu_vm_cpu_set_ptes;
1280                 params.pages_addr = pages_addr;
1281                 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1282                                            addr, flags);
1283         }
1284
1285         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1286
1287         nptes = last - start + 1;
1288
1289         /*
1290          * reserve space for two commands every (1 << BLOCK_SIZE)
1291          *  entries or 2k dwords (whatever is smaller)
1292          *
1293          * The second command is for the shadow pagetables.
1294          */
1295         if (vm->root.base.bo->shadow)
1296                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1297         else
1298                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1299
1300         /* padding, etc. */
1301         ndw = 64;
1302
1303         if (pages_addr) {
1304                 /* copy commands needed */
1305                 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1306
1307                 /* and also PTEs */
1308                 ndw += nptes * 2;
1309
1310                 params.func = amdgpu_vm_do_copy_ptes;
1311
1312         } else {
1313                 /* set page commands needed */
1314                 ndw += ncmds * 10;
1315
1316                 /* extra commands for begin/end fragments */
1317                 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1318
1319                 params.func = amdgpu_vm_do_set_ptes;
1320         }
1321
1322         r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1323         if (r)
1324                 return r;
1325
1326         params.ib = &job->ibs[0];
1327
1328         if (pages_addr) {
1329                 uint64_t *pte;
1330                 unsigned i;
1331
1332                 /* Put the PTEs at the end of the IB. */
1333                 i = ndw - nptes * 2;
1334                 pte= (uint64_t *)&(job->ibs->ptr[i]);
1335                 params.src = job->ibs->gpu_addr + i * 4;
1336
1337                 for (i = 0; i < nptes; ++i) {
1338                         pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1339                                                     AMDGPU_GPU_PAGE_SIZE);
1340                         pte[i] |= flags;
1341                 }
1342                 addr = 0;
1343         }
1344
1345         r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1346         if (r)
1347                 goto error_free;
1348
1349         r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1350                              owner, false);
1351         if (r)
1352                 goto error_free;
1353
1354         r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1355         if (r)
1356                 goto error_free;
1357
1358         r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1359         if (r)
1360                 goto error_free;
1361
1362         amdgpu_ring_pad_ib(ring, params.ib);
1363         WARN_ON(params.ib->length_dw > ndw);
1364         r = amdgpu_job_submit(job, ring, &vm->entity,
1365                               AMDGPU_FENCE_OWNER_VM, &f);
1366         if (r)
1367                 goto error_free;
1368
1369         amdgpu_bo_fence(vm->root.base.bo, f, true);
1370         dma_fence_put(*fence);
1371         *fence = f;
1372         return 0;
1373
1374 error_free:
1375         amdgpu_job_free(job);
1376         return r;
1377 }
1378
1379 /**
1380  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1381  *
1382  * @adev: amdgpu_device pointer
1383  * @exclusive: fence we need to sync to
1384  * @pages_addr: DMA addresses to use for mapping
1385  * @vm: requested vm
1386  * @mapping: mapped range and flags to use for the update
1387  * @flags: HW flags for the mapping
1388  * @nodes: array of drm_mm_nodes with the MC addresses
1389  * @fence: optional resulting fence
1390  *
1391  * Split the mapping into smaller chunks so that each update fits
1392  * into a SDMA IB.
1393  * Returns 0 for success, -EINVAL for failure.
1394  */
1395 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1396                                       struct dma_fence *exclusive,
1397                                       dma_addr_t *pages_addr,
1398                                       struct amdgpu_vm *vm,
1399                                       struct amdgpu_bo_va_mapping *mapping,
1400                                       uint64_t flags,
1401                                       struct drm_mm_node *nodes,
1402                                       struct dma_fence **fence)
1403 {
1404         unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
1405         uint64_t pfn, start = mapping->start;
1406         int r;
1407
1408         /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1409          * but in case of something, we filter the flags in first place
1410          */
1411         if (!(mapping->flags & AMDGPU_PTE_READABLE))
1412                 flags &= ~AMDGPU_PTE_READABLE;
1413         if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1414                 flags &= ~AMDGPU_PTE_WRITEABLE;
1415
1416         flags &= ~AMDGPU_PTE_EXECUTABLE;
1417         flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1418
1419         flags &= ~AMDGPU_PTE_MTYPE_MASK;
1420         flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1421
1422         if ((mapping->flags & AMDGPU_PTE_PRT) &&
1423             (adev->asic_type >= CHIP_VEGA10)) {
1424                 flags |= AMDGPU_PTE_PRT;
1425                 flags &= ~AMDGPU_PTE_VALID;
1426         }
1427
1428         trace_amdgpu_vm_bo_update(mapping);
1429
1430         pfn = mapping->offset >> PAGE_SHIFT;
1431         if (nodes) {
1432                 while (pfn >= nodes->size) {
1433                         pfn -= nodes->size;
1434                         ++nodes;
1435                 }
1436         }
1437
1438         do {
1439                 dma_addr_t *dma_addr = NULL;
1440                 uint64_t max_entries;
1441                 uint64_t addr, last;
1442
1443                 if (nodes) {
1444                         addr = nodes->start << PAGE_SHIFT;
1445                         max_entries = (nodes->size - pfn) *
1446                                 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1447                 } else {
1448                         addr = 0;
1449                         max_entries = S64_MAX;
1450                 }
1451
1452                 if (pages_addr) {
1453                         uint64_t count;
1454
1455                         max_entries = min(max_entries, 16ull * 1024ull);
1456                         for (count = 1; count < max_entries; ++count) {
1457                                 uint64_t idx = pfn + count;
1458
1459                                 if (pages_addr[idx] !=
1460                                     (pages_addr[idx - 1] + PAGE_SIZE))
1461                                         break;
1462                         }
1463
1464                         if (count < min_linear_pages) {
1465                                 addr = pfn << PAGE_SHIFT;
1466                                 dma_addr = pages_addr;
1467                         } else {
1468                                 addr = pages_addr[pfn];
1469                                 max_entries = count;
1470                         }
1471
1472                 } else if (flags & AMDGPU_PTE_VALID) {
1473                         addr += adev->vm_manager.vram_base_offset;
1474                         addr += pfn << PAGE_SHIFT;
1475                 }
1476
1477                 last = min((uint64_t)mapping->last, start + max_entries - 1);
1478                 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1479                                                 start, last, flags, addr,
1480                                                 fence);
1481                 if (r)
1482                         return r;
1483
1484                 pfn += last - start + 1;
1485                 if (nodes && nodes->size == pfn) {
1486                         pfn = 0;
1487                         ++nodes;
1488                 }
1489                 start = last + 1;
1490
1491         } while (unlikely(start != mapping->last + 1));
1492
1493         return 0;
1494 }
1495
1496 /**
1497  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1498  *
1499  * @adev: amdgpu_device pointer
1500  * @bo_va: requested BO and VM object
1501  * @clear: if true clear the entries
1502  *
1503  * Fill in the page table entries for @bo_va.
1504  * Returns 0 for success, -EINVAL for failure.
1505  */
1506 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1507                         struct amdgpu_bo_va *bo_va,
1508                         bool clear)
1509 {
1510         struct amdgpu_bo *bo = bo_va->base.bo;
1511         struct amdgpu_vm *vm = bo_va->base.vm;
1512         struct amdgpu_bo_va_mapping *mapping;
1513         dma_addr_t *pages_addr = NULL;
1514         struct ttm_mem_reg *mem;
1515         struct drm_mm_node *nodes;
1516         struct dma_fence *exclusive, **last_update;
1517         uint64_t flags;
1518         int r;
1519
1520         if (clear || !bo_va->base.bo) {
1521                 mem = NULL;
1522                 nodes = NULL;
1523                 exclusive = NULL;
1524         } else {
1525                 struct ttm_dma_tt *ttm;
1526
1527                 mem = &bo_va->base.bo->tbo.mem;
1528                 nodes = mem->mm_node;
1529                 if (mem->mem_type == TTM_PL_TT) {
1530                         ttm = container_of(bo_va->base.bo->tbo.ttm,
1531                                            struct ttm_dma_tt, ttm);
1532                         pages_addr = ttm->dma_address;
1533                 }
1534                 exclusive = reservation_object_get_excl(bo->tbo.resv);
1535         }
1536
1537         if (bo)
1538                 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1539         else
1540                 flags = 0x0;
1541
1542         if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1543                 last_update = &vm->last_update;
1544         else
1545                 last_update = &bo_va->last_pt_update;
1546
1547         if (!clear && bo_va->base.moved) {
1548                 bo_va->base.moved = false;
1549                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1550
1551         } else if (bo_va->cleared != clear) {
1552                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1553         }
1554
1555         list_for_each_entry(mapping, &bo_va->invalids, list) {
1556                 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1557                                                mapping, flags, nodes,
1558                                                last_update);
1559                 if (r)
1560                         return r;
1561         }
1562
1563         if (vm->use_cpu_for_update) {
1564                 /* Flush HDP */
1565                 mb();
1566                 amdgpu_asic_flush_hdp(adev, NULL);
1567         }
1568
1569         spin_lock(&vm->moved_lock);
1570         list_del_init(&bo_va->base.vm_status);
1571         spin_unlock(&vm->moved_lock);
1572
1573         /* If the BO is not in its preferred location add it back to
1574          * the evicted list so that it gets validated again on the
1575          * next command submission.
1576          */
1577         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1578             !(bo->preferred_domains &
1579             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)))
1580                 list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1581
1582         list_splice_init(&bo_va->invalids, &bo_va->valids);
1583         bo_va->cleared = clear;
1584
1585         if (trace_amdgpu_vm_bo_mapping_enabled()) {
1586                 list_for_each_entry(mapping, &bo_va->valids, list)
1587                         trace_amdgpu_vm_bo_mapping(mapping);
1588         }
1589
1590         return 0;
1591 }
1592
1593 /**
1594  * amdgpu_vm_update_prt_state - update the global PRT state
1595  */
1596 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1597 {
1598         unsigned long flags;
1599         bool enable;
1600
1601         spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1602         enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1603         adev->gmc.gmc_funcs->set_prt(adev, enable);
1604         spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1605 }
1606
1607 /**
1608  * amdgpu_vm_prt_get - add a PRT user
1609  */
1610 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1611 {
1612         if (!adev->gmc.gmc_funcs->set_prt)
1613                 return;
1614
1615         if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1616                 amdgpu_vm_update_prt_state(adev);
1617 }
1618
1619 /**
1620  * amdgpu_vm_prt_put - drop a PRT user
1621  */
1622 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1623 {
1624         if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1625                 amdgpu_vm_update_prt_state(adev);
1626 }
1627
1628 /**
1629  * amdgpu_vm_prt_cb - callback for updating the PRT status
1630  */
1631 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1632 {
1633         struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1634
1635         amdgpu_vm_prt_put(cb->adev);
1636         kfree(cb);
1637 }
1638
1639 /**
1640  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1641  */
1642 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1643                                  struct dma_fence *fence)
1644 {
1645         struct amdgpu_prt_cb *cb;
1646
1647         if (!adev->gmc.gmc_funcs->set_prt)
1648                 return;
1649
1650         cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1651         if (!cb) {
1652                 /* Last resort when we are OOM */
1653                 if (fence)
1654                         dma_fence_wait(fence, false);
1655
1656                 amdgpu_vm_prt_put(adev);
1657         } else {
1658                 cb->adev = adev;
1659                 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1660                                                      amdgpu_vm_prt_cb))
1661                         amdgpu_vm_prt_cb(fence, &cb->cb);
1662         }
1663 }
1664
1665 /**
1666  * amdgpu_vm_free_mapping - free a mapping
1667  *
1668  * @adev: amdgpu_device pointer
1669  * @vm: requested vm
1670  * @mapping: mapping to be freed
1671  * @fence: fence of the unmap operation
1672  *
1673  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1674  */
1675 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1676                                    struct amdgpu_vm *vm,
1677                                    struct amdgpu_bo_va_mapping *mapping,
1678                                    struct dma_fence *fence)
1679 {
1680         if (mapping->flags & AMDGPU_PTE_PRT)
1681                 amdgpu_vm_add_prt_cb(adev, fence);
1682         kfree(mapping);
1683 }
1684
1685 /**
1686  * amdgpu_vm_prt_fini - finish all prt mappings
1687  *
1688  * @adev: amdgpu_device pointer
1689  * @vm: requested vm
1690  *
1691  * Register a cleanup callback to disable PRT support after VM dies.
1692  */
1693 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1694 {
1695         struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1696         struct dma_fence *excl, **shared;
1697         unsigned i, shared_count;
1698         int r;
1699
1700         r = reservation_object_get_fences_rcu(resv, &excl,
1701                                               &shared_count, &shared);
1702         if (r) {
1703                 /* Not enough memory to grab the fence list, as last resort
1704                  * block for all the fences to complete.
1705                  */
1706                 reservation_object_wait_timeout_rcu(resv, true, false,
1707                                                     MAX_SCHEDULE_TIMEOUT);
1708                 return;
1709         }
1710
1711         /* Add a callback for each fence in the reservation object */
1712         amdgpu_vm_prt_get(adev);
1713         amdgpu_vm_add_prt_cb(adev, excl);
1714
1715         for (i = 0; i < shared_count; ++i) {
1716                 amdgpu_vm_prt_get(adev);
1717                 amdgpu_vm_add_prt_cb(adev, shared[i]);
1718         }
1719
1720         kfree(shared);
1721 }
1722
1723 /**
1724  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1725  *
1726  * @adev: amdgpu_device pointer
1727  * @vm: requested vm
1728  * @fence: optional resulting fence (unchanged if no work needed to be done
1729  * or if an error occurred)
1730  *
1731  * Make sure all freed BOs are cleared in the PT.
1732  * Returns 0 for success.
1733  *
1734  * PTs have to be reserved and mutex must be locked!
1735  */
1736 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1737                           struct amdgpu_vm *vm,
1738                           struct dma_fence **fence)
1739 {
1740         struct amdgpu_bo_va_mapping *mapping;
1741         uint64_t init_pte_value = 0;
1742         struct dma_fence *f = NULL;
1743         int r;
1744
1745         while (!list_empty(&vm->freed)) {
1746                 mapping = list_first_entry(&vm->freed,
1747                         struct amdgpu_bo_va_mapping, list);
1748                 list_del(&mapping->list);
1749
1750                 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1751                         init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1752
1753                 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
1754                                                 mapping->start, mapping->last,
1755                                                 init_pte_value, 0, &f);
1756                 amdgpu_vm_free_mapping(adev, vm, mapping, f);
1757                 if (r) {
1758                         dma_fence_put(f);
1759                         return r;
1760                 }
1761         }
1762
1763         if (fence && f) {
1764                 dma_fence_put(*fence);
1765                 *fence = f;
1766         } else {
1767                 dma_fence_put(f);
1768         }
1769
1770         return 0;
1771
1772 }
1773
1774 /**
1775  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1776  *
1777  * @adev: amdgpu_device pointer
1778  * @vm: requested vm
1779  * @sync: sync object to add fences to
1780  *
1781  * Make sure all BOs which are moved are updated in the PTs.
1782  * Returns 0 for success.
1783  *
1784  * PTs have to be reserved!
1785  */
1786 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1787                            struct amdgpu_vm *vm)
1788 {
1789         bool clear;
1790         int r = 0;
1791
1792         spin_lock(&vm->moved_lock);
1793         while (!list_empty(&vm->moved)) {
1794                 struct amdgpu_bo_va *bo_va;
1795                 struct reservation_object *resv;
1796
1797                 bo_va = list_first_entry(&vm->moved,
1798                         struct amdgpu_bo_va, base.vm_status);
1799                 spin_unlock(&vm->moved_lock);
1800
1801                 resv = bo_va->base.bo->tbo.resv;
1802
1803                 /* Per VM BOs never need to bo cleared in the page tables */
1804                 if (resv == vm->root.base.bo->tbo.resv)
1805                         clear = false;
1806                 /* Try to reserve the BO to avoid clearing its ptes */
1807                 else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
1808                         clear = false;
1809                 /* Somebody else is using the BO right now */
1810                 else
1811                         clear = true;
1812
1813                 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1814                 if (r)
1815                         return r;
1816
1817                 if (!clear && resv != vm->root.base.bo->tbo.resv)
1818                         reservation_object_unlock(resv);
1819
1820                 spin_lock(&vm->moved_lock);
1821         }
1822         spin_unlock(&vm->moved_lock);
1823
1824         return r;
1825 }
1826
1827 /**
1828  * amdgpu_vm_bo_add - add a bo to a specific vm
1829  *
1830  * @adev: amdgpu_device pointer
1831  * @vm: requested vm
1832  * @bo: amdgpu buffer object
1833  *
1834  * Add @bo into the requested vm.
1835  * Add @bo to the list of bos associated with the vm
1836  * Returns newly added bo_va or NULL for failure
1837  *
1838  * Object has to be reserved!
1839  */
1840 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1841                                       struct amdgpu_vm *vm,
1842                                       struct amdgpu_bo *bo)
1843 {
1844         struct amdgpu_bo_va *bo_va;
1845
1846         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1847         if (bo_va == NULL) {
1848                 return NULL;
1849         }
1850         amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1851
1852         bo_va->ref_count = 1;
1853         INIT_LIST_HEAD(&bo_va->valids);
1854         INIT_LIST_HEAD(&bo_va->invalids);
1855
1856         return bo_va;
1857 }
1858
1859
1860 /**
1861  * amdgpu_vm_bo_insert_mapping - insert a new mapping
1862  *
1863  * @adev: amdgpu_device pointer
1864  * @bo_va: bo_va to store the address
1865  * @mapping: the mapping to insert
1866  *
1867  * Insert a new mapping into all structures.
1868  */
1869 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1870                                     struct amdgpu_bo_va *bo_va,
1871                                     struct amdgpu_bo_va_mapping *mapping)
1872 {
1873         struct amdgpu_vm *vm = bo_va->base.vm;
1874         struct amdgpu_bo *bo = bo_va->base.bo;
1875
1876         mapping->bo_va = bo_va;
1877         list_add(&mapping->list, &bo_va->invalids);
1878         amdgpu_vm_it_insert(mapping, &vm->va);
1879
1880         if (mapping->flags & AMDGPU_PTE_PRT)
1881                 amdgpu_vm_prt_get(adev);
1882
1883         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1884                 spin_lock(&vm->moved_lock);
1885                 if (list_empty(&bo_va->base.vm_status))
1886                         list_add(&bo_va->base.vm_status, &vm->moved);
1887                 spin_unlock(&vm->moved_lock);
1888         }
1889         trace_amdgpu_vm_bo_map(bo_va, mapping);
1890 }
1891
1892 /**
1893  * amdgpu_vm_bo_map - map bo inside a vm
1894  *
1895  * @adev: amdgpu_device pointer
1896  * @bo_va: bo_va to store the address
1897  * @saddr: where to map the BO
1898  * @offset: requested offset in the BO
1899  * @flags: attributes of pages (read/write/valid/etc.)
1900  *
1901  * Add a mapping of the BO at the specefied addr into the VM.
1902  * Returns 0 for success, error for failure.
1903  *
1904  * Object has to be reserved and unreserved outside!
1905  */
1906 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1907                      struct amdgpu_bo_va *bo_va,
1908                      uint64_t saddr, uint64_t offset,
1909                      uint64_t size, uint64_t flags)
1910 {
1911         struct amdgpu_bo_va_mapping *mapping, *tmp;
1912         struct amdgpu_bo *bo = bo_va->base.bo;
1913         struct amdgpu_vm *vm = bo_va->base.vm;
1914         uint64_t eaddr;
1915
1916         /* validate the parameters */
1917         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1918             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1919                 return -EINVAL;
1920
1921         /* make sure object fit at this offset */
1922         eaddr = saddr + size - 1;
1923         if (saddr >= eaddr ||
1924             (bo && offset + size > amdgpu_bo_size(bo)))
1925                 return -EINVAL;
1926
1927         saddr /= AMDGPU_GPU_PAGE_SIZE;
1928         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1929
1930         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1931         if (tmp) {
1932                 /* bo and tmp overlap, invalid addr */
1933                 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1934                         "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1935                         tmp->start, tmp->last + 1);
1936                 return -EINVAL;
1937         }
1938
1939         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1940         if (!mapping)
1941                 return -ENOMEM;
1942
1943         mapping->start = saddr;
1944         mapping->last = eaddr;
1945         mapping->offset = offset;
1946         mapping->flags = flags;
1947
1948         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1949
1950         return 0;
1951 }
1952
1953 /**
1954  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1955  *
1956  * @adev: amdgpu_device pointer
1957  * @bo_va: bo_va to store the address
1958  * @saddr: where to map the BO
1959  * @offset: requested offset in the BO
1960  * @flags: attributes of pages (read/write/valid/etc.)
1961  *
1962  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1963  * mappings as we do so.
1964  * Returns 0 for success, error for failure.
1965  *
1966  * Object has to be reserved and unreserved outside!
1967  */
1968 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1969                              struct amdgpu_bo_va *bo_va,
1970                              uint64_t saddr, uint64_t offset,
1971                              uint64_t size, uint64_t flags)
1972 {
1973         struct amdgpu_bo_va_mapping *mapping;
1974         struct amdgpu_bo *bo = bo_va->base.bo;
1975         uint64_t eaddr;
1976         int r;
1977
1978         /* validate the parameters */
1979         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1980             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1981                 return -EINVAL;
1982
1983         /* make sure object fit at this offset */
1984         eaddr = saddr + size - 1;
1985         if (saddr >= eaddr ||
1986             (bo && offset + size > amdgpu_bo_size(bo)))
1987                 return -EINVAL;
1988
1989         /* Allocate all the needed memory */
1990         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1991         if (!mapping)
1992                 return -ENOMEM;
1993
1994         r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1995         if (r) {
1996                 kfree(mapping);
1997                 return r;
1998         }
1999
2000         saddr /= AMDGPU_GPU_PAGE_SIZE;
2001         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2002
2003         mapping->start = saddr;
2004         mapping->last = eaddr;
2005         mapping->offset = offset;
2006         mapping->flags = flags;
2007
2008         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2009
2010         return 0;
2011 }
2012
2013 /**
2014  * amdgpu_vm_bo_unmap - remove bo mapping from vm
2015  *
2016  * @adev: amdgpu_device pointer
2017  * @bo_va: bo_va to remove the address from
2018  * @saddr: where to the BO is mapped
2019  *
2020  * Remove a mapping of the BO at the specefied addr from the VM.
2021  * Returns 0 for success, error for failure.
2022  *
2023  * Object has to be reserved and unreserved outside!
2024  */
2025 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2026                        struct amdgpu_bo_va *bo_va,
2027                        uint64_t saddr)
2028 {
2029         struct amdgpu_bo_va_mapping *mapping;
2030         struct amdgpu_vm *vm = bo_va->base.vm;
2031         bool valid = true;
2032
2033         saddr /= AMDGPU_GPU_PAGE_SIZE;
2034
2035         list_for_each_entry(mapping, &bo_va->valids, list) {
2036                 if (mapping->start == saddr)
2037                         break;
2038         }
2039
2040         if (&mapping->list == &bo_va->valids) {
2041                 valid = false;
2042
2043                 list_for_each_entry(mapping, &bo_va->invalids, list) {
2044                         if (mapping->start == saddr)
2045                                 break;
2046                 }
2047
2048                 if (&mapping->list == &bo_va->invalids)
2049                         return -ENOENT;
2050         }
2051
2052         list_del(&mapping->list);
2053         amdgpu_vm_it_remove(mapping, &vm->va);
2054         mapping->bo_va = NULL;
2055         trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2056
2057         if (valid)
2058                 list_add(&mapping->list, &vm->freed);
2059         else
2060                 amdgpu_vm_free_mapping(adev, vm, mapping,
2061                                        bo_va->last_pt_update);
2062
2063         return 0;
2064 }
2065
2066 /**
2067  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2068  *
2069  * @adev: amdgpu_device pointer
2070  * @vm: VM structure to use
2071  * @saddr: start of the range
2072  * @size: size of the range
2073  *
2074  * Remove all mappings in a range, split them as appropriate.
2075  * Returns 0 for success, error for failure.
2076  */
2077 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2078                                 struct amdgpu_vm *vm,
2079                                 uint64_t saddr, uint64_t size)
2080 {
2081         struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2082         LIST_HEAD(removed);
2083         uint64_t eaddr;
2084
2085         eaddr = saddr + size - 1;
2086         saddr /= AMDGPU_GPU_PAGE_SIZE;
2087         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2088
2089         /* Allocate all the needed memory */
2090         before = kzalloc(sizeof(*before), GFP_KERNEL);
2091         if (!before)
2092                 return -ENOMEM;
2093         INIT_LIST_HEAD(&before->list);
2094
2095         after = kzalloc(sizeof(*after), GFP_KERNEL);
2096         if (!after) {
2097                 kfree(before);
2098                 return -ENOMEM;
2099         }
2100         INIT_LIST_HEAD(&after->list);
2101
2102         /* Now gather all removed mappings */
2103         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2104         while (tmp) {
2105                 /* Remember mapping split at the start */
2106                 if (tmp->start < saddr) {
2107                         before->start = tmp->start;
2108                         before->last = saddr - 1;
2109                         before->offset = tmp->offset;
2110                         before->flags = tmp->flags;
2111                         list_add(&before->list, &tmp->list);
2112                 }
2113
2114                 /* Remember mapping split at the end */
2115                 if (tmp->last > eaddr) {
2116                         after->start = eaddr + 1;
2117                         after->last = tmp->last;
2118                         after->offset = tmp->offset;
2119                         after->offset += after->start - tmp->start;
2120                         after->flags = tmp->flags;
2121                         list_add(&after->list, &tmp->list);
2122                 }
2123
2124                 list_del(&tmp->list);
2125                 list_add(&tmp->list, &removed);
2126
2127                 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2128         }
2129
2130         /* And free them up */
2131         list_for_each_entry_safe(tmp, next, &removed, list) {
2132                 amdgpu_vm_it_remove(tmp, &vm->va);
2133                 list_del(&tmp->list);
2134
2135                 if (tmp->start < saddr)
2136                     tmp->start = saddr;
2137                 if (tmp->last > eaddr)
2138                     tmp->last = eaddr;
2139
2140                 tmp->bo_va = NULL;
2141                 list_add(&tmp->list, &vm->freed);
2142                 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2143         }
2144
2145         /* Insert partial mapping before the range */
2146         if (!list_empty(&before->list)) {
2147                 amdgpu_vm_it_insert(before, &vm->va);
2148                 if (before->flags & AMDGPU_PTE_PRT)
2149                         amdgpu_vm_prt_get(adev);
2150         } else {
2151                 kfree(before);
2152         }
2153
2154         /* Insert partial mapping after the range */
2155         if (!list_empty(&after->list)) {
2156                 amdgpu_vm_it_insert(after, &vm->va);
2157                 if (after->flags & AMDGPU_PTE_PRT)
2158                         amdgpu_vm_prt_get(adev);
2159         } else {
2160                 kfree(after);
2161         }
2162
2163         return 0;
2164 }
2165
2166 /**
2167  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2168  *
2169  * @vm: the requested VM
2170  *
2171  * Find a mapping by it's address.
2172  */
2173 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2174                                                          uint64_t addr)
2175 {
2176         return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2177 }
2178
2179 /**
2180  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2181  *
2182  * @adev: amdgpu_device pointer
2183  * @bo_va: requested bo_va
2184  *
2185  * Remove @bo_va->bo from the requested vm.
2186  *
2187  * Object have to be reserved!
2188  */
2189 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2190                       struct amdgpu_bo_va *bo_va)
2191 {
2192         struct amdgpu_bo_va_mapping *mapping, *next;
2193         struct amdgpu_vm *vm = bo_va->base.vm;
2194
2195         list_del(&bo_va->base.bo_list);
2196
2197         spin_lock(&vm->moved_lock);
2198         list_del(&bo_va->base.vm_status);
2199         spin_unlock(&vm->moved_lock);
2200
2201         list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2202                 list_del(&mapping->list);
2203                 amdgpu_vm_it_remove(mapping, &vm->va);
2204                 mapping->bo_va = NULL;
2205                 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2206                 list_add(&mapping->list, &vm->freed);
2207         }
2208         list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2209                 list_del(&mapping->list);
2210                 amdgpu_vm_it_remove(mapping, &vm->va);
2211                 amdgpu_vm_free_mapping(adev, vm, mapping,
2212                                        bo_va->last_pt_update);
2213         }
2214
2215         dma_fence_put(bo_va->last_pt_update);
2216         kfree(bo_va);
2217 }
2218
2219 /**
2220  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2221  *
2222  * @adev: amdgpu_device pointer
2223  * @vm: requested vm
2224  * @bo: amdgpu buffer object
2225  *
2226  * Mark @bo as invalid.
2227  */
2228 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2229                              struct amdgpu_bo *bo, bool evicted)
2230 {
2231         struct amdgpu_vm_bo_base *bo_base;
2232
2233         /* shadow bo doesn't have bo base, its validation needs its parent */
2234         if (bo->parent && bo->parent->shadow == bo)
2235                 bo = bo->parent;
2236
2237         list_for_each_entry(bo_base, &bo->va, bo_list) {
2238                 struct amdgpu_vm *vm = bo_base->vm;
2239
2240                 bo_base->moved = true;
2241                 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2242                         if (bo->tbo.type == ttm_bo_type_kernel)
2243                                 list_move(&bo_base->vm_status, &vm->evicted);
2244                         else
2245                                 list_move_tail(&bo_base->vm_status,
2246                                                &vm->evicted);
2247                         continue;
2248                 }
2249
2250                 if (bo->tbo.type == ttm_bo_type_kernel) {
2251                         if (list_empty(&bo_base->vm_status))
2252                                 list_add(&bo_base->vm_status, &vm->relocated);
2253                         continue;
2254                 }
2255
2256                 spin_lock(&bo_base->vm->moved_lock);
2257                 if (list_empty(&bo_base->vm_status))
2258                         list_add(&bo_base->vm_status, &vm->moved);
2259                 spin_unlock(&bo_base->vm->moved_lock);
2260         }
2261 }
2262
2263 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2264 {
2265         /* Total bits covered by PD + PTs */
2266         unsigned bits = ilog2(vm_size) + 18;
2267
2268         /* Make sure the PD is 4K in size up to 8GB address space.
2269            Above that split equal between PD and PTs */
2270         if (vm_size <= 8)
2271                 return (bits - 9);
2272         else
2273                 return ((bits + 3) / 2);
2274 }
2275
2276 /**
2277  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2278  *
2279  * @adev: amdgpu_device pointer
2280  * @vm_size: the default vm size if it's set auto
2281  */
2282 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2283                            uint32_t fragment_size_default, unsigned max_level,
2284                            unsigned max_bits)
2285 {
2286         uint64_t tmp;
2287
2288         /* adjust vm size first */
2289         if (amdgpu_vm_size != -1) {
2290                 unsigned max_size = 1 << (max_bits - 30);
2291
2292                 vm_size = amdgpu_vm_size;
2293                 if (vm_size > max_size) {
2294                         dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2295                                  amdgpu_vm_size, max_size);
2296                         vm_size = max_size;
2297                 }
2298         }
2299
2300         adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2301
2302         tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2303         if (amdgpu_vm_block_size != -1)
2304                 tmp >>= amdgpu_vm_block_size - 9;
2305         tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2306         adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2307         switch (adev->vm_manager.num_level) {
2308         case 3:
2309                 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2310                 break;
2311         case 2:
2312                 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2313                 break;
2314         case 1:
2315                 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2316                 break;
2317         default:
2318                 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2319         }
2320         /* block size depends on vm size and hw setup*/
2321         if (amdgpu_vm_block_size != -1)
2322                 adev->vm_manager.block_size =
2323                         min((unsigned)amdgpu_vm_block_size, max_bits
2324                             - AMDGPU_GPU_PAGE_SHIFT
2325                             - 9 * adev->vm_manager.num_level);
2326         else if (adev->vm_manager.num_level > 1)
2327                 adev->vm_manager.block_size = 9;
2328         else
2329                 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2330
2331         if (amdgpu_vm_fragment_size == -1)
2332                 adev->vm_manager.fragment_size = fragment_size_default;
2333         else
2334                 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2335
2336         DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2337                  vm_size, adev->vm_manager.num_level + 1,
2338                  adev->vm_manager.block_size,
2339                  adev->vm_manager.fragment_size);
2340 }
2341
2342 /**
2343  * amdgpu_vm_init - initialize a vm instance
2344  *
2345  * @adev: amdgpu_device pointer
2346  * @vm: requested vm
2347  * @vm_context: Indicates if it GFX or Compute context
2348  *
2349  * Init @vm fields.
2350  */
2351 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2352                    int vm_context, unsigned int pasid)
2353 {
2354         struct amdgpu_bo_param bp;
2355         struct amdgpu_bo *root;
2356         const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2357                 AMDGPU_VM_PTE_COUNT(adev) * 8);
2358         unsigned ring_instance;
2359         struct amdgpu_ring *ring;
2360         struct drm_sched_rq *rq;
2361         unsigned long size;
2362         uint64_t flags;
2363         int r, i;
2364
2365         vm->va = RB_ROOT_CACHED;
2366         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2367                 vm->reserved_vmid[i] = NULL;
2368         INIT_LIST_HEAD(&vm->evicted);
2369         INIT_LIST_HEAD(&vm->relocated);
2370         spin_lock_init(&vm->moved_lock);
2371         INIT_LIST_HEAD(&vm->moved);
2372         INIT_LIST_HEAD(&vm->freed);
2373
2374         /* create scheduler entity for page table updates */
2375
2376         ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2377         ring_instance %= adev->vm_manager.vm_pte_num_rings;
2378         ring = adev->vm_manager.vm_pte_rings[ring_instance];
2379         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2380         r = drm_sched_entity_init(&ring->sched, &vm->entity,
2381                                   rq, NULL);
2382         if (r)
2383                 return r;
2384
2385         vm->pte_support_ats = false;
2386
2387         if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2388                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2389                                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2390
2391                 if (adev->asic_type == CHIP_RAVEN)
2392                         vm->pte_support_ats = true;
2393         } else {
2394                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2395                                                 AMDGPU_VM_USE_CPU_FOR_GFX);
2396         }
2397         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2398                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2399         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2400                   "CPU update of VM recommended only for large BAR system\n");
2401         vm->last_update = NULL;
2402
2403         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2404         if (vm->use_cpu_for_update)
2405                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2406         else
2407                 flags |= AMDGPU_GEM_CREATE_SHADOW;
2408
2409         size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2410         memset(&bp, 0, sizeof(bp));
2411         bp.size = size;
2412         bp.byte_align = align;
2413         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2414         bp.flags = flags;
2415         bp.type = ttm_bo_type_kernel;
2416         bp.resv = NULL;
2417         r = amdgpu_bo_create(adev, &bp, &root);
2418         if (r)
2419                 goto error_free_sched_entity;
2420
2421         r = amdgpu_bo_reserve(root, true);
2422         if (r)
2423                 goto error_free_root;
2424
2425         r = amdgpu_vm_clear_bo(adev, vm, root,
2426                                adev->vm_manager.root_level,
2427                                vm->pte_support_ats);
2428         if (r)
2429                 goto error_unreserve;
2430
2431         amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2432         amdgpu_bo_unreserve(vm->root.base.bo);
2433
2434         if (pasid) {
2435                 unsigned long flags;
2436
2437                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2438                 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2439                               GFP_ATOMIC);
2440                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2441                 if (r < 0)
2442                         goto error_free_root;
2443
2444                 vm->pasid = pasid;
2445         }
2446
2447         INIT_KFIFO(vm->faults);
2448         vm->fault_credit = 16;
2449
2450         return 0;
2451
2452 error_unreserve:
2453         amdgpu_bo_unreserve(vm->root.base.bo);
2454
2455 error_free_root:
2456         amdgpu_bo_unref(&vm->root.base.bo->shadow);
2457         amdgpu_bo_unref(&vm->root.base.bo);
2458         vm->root.base.bo = NULL;
2459
2460 error_free_sched_entity:
2461         drm_sched_entity_fini(&ring->sched, &vm->entity);
2462
2463         return r;
2464 }
2465
2466 /**
2467  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2468  *
2469  * This only works on GFX VMs that don't have any BOs added and no
2470  * page tables allocated yet.
2471  *
2472  * Changes the following VM parameters:
2473  * - use_cpu_for_update
2474  * - pte_supports_ats
2475  * - pasid (old PASID is released, because compute manages its own PASIDs)
2476  *
2477  * Reinitializes the page directory to reflect the changed ATS
2478  * setting. May leave behind an unused shadow BO for the page
2479  * directory when switching from SDMA updates to CPU updates.
2480  *
2481  * Returns 0 for success, -errno for errors.
2482  */
2483 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2484 {
2485         bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2486         int r;
2487
2488         r = amdgpu_bo_reserve(vm->root.base.bo, true);
2489         if (r)
2490                 return r;
2491
2492         /* Sanity checks */
2493         if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2494                 r = -EINVAL;
2495                 goto error;
2496         }
2497
2498         /* Check if PD needs to be reinitialized and do it before
2499          * changing any other state, in case it fails.
2500          */
2501         if (pte_support_ats != vm->pte_support_ats) {
2502                 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2503                                adev->vm_manager.root_level,
2504                                pte_support_ats);
2505                 if (r)
2506                         goto error;
2507         }
2508
2509         /* Update VM state */
2510         vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2511                                     AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2512         vm->pte_support_ats = pte_support_ats;
2513         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2514                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2515         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2516                   "CPU update of VM recommended only for large BAR system\n");
2517
2518         if (vm->pasid) {
2519                 unsigned long flags;
2520
2521                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2522                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2523                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2524
2525                 vm->pasid = 0;
2526         }
2527
2528 error:
2529         amdgpu_bo_unreserve(vm->root.base.bo);
2530         return r;
2531 }
2532
2533 /**
2534  * amdgpu_vm_free_levels - free PD/PT levels
2535  *
2536  * @adev: amdgpu device structure
2537  * @parent: PD/PT starting level to free
2538  * @level: level of parent structure
2539  *
2540  * Free the page directory or page table level and all sub levels.
2541  */
2542 static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2543                                   struct amdgpu_vm_pt *parent,
2544                                   unsigned level)
2545 {
2546         unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
2547
2548         if (parent->base.bo) {
2549                 list_del(&parent->base.bo_list);
2550                 list_del(&parent->base.vm_status);
2551                 amdgpu_bo_unref(&parent->base.bo->shadow);
2552                 amdgpu_bo_unref(&parent->base.bo);
2553         }
2554
2555         if (parent->entries)
2556                 for (i = 0; i < num_entries; i++)
2557                         amdgpu_vm_free_levels(adev, &parent->entries[i],
2558                                               level + 1);
2559
2560         kvfree(parent->entries);
2561 }
2562
2563 /**
2564  * amdgpu_vm_fini - tear down a vm instance
2565  *
2566  * @adev: amdgpu_device pointer
2567  * @vm: requested vm
2568  *
2569  * Tear down @vm.
2570  * Unbind the VM and remove all bos from the vm bo list
2571  */
2572 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2573 {
2574         struct amdgpu_bo_va_mapping *mapping, *tmp;
2575         bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2576         struct amdgpu_bo *root;
2577         u64 fault;
2578         int i, r;
2579
2580         amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2581
2582         /* Clear pending page faults from IH when the VM is destroyed */
2583         while (kfifo_get(&vm->faults, &fault))
2584                 amdgpu_ih_clear_fault(adev, fault);
2585
2586         if (vm->pasid) {
2587                 unsigned long flags;
2588
2589                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2590                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2591                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2592         }
2593
2594         drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2595
2596         if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2597                 dev_err(adev->dev, "still active bo inside vm\n");
2598         }
2599         rbtree_postorder_for_each_entry_safe(mapping, tmp,
2600                                              &vm->va.rb_root, rb) {
2601                 list_del(&mapping->list);
2602                 amdgpu_vm_it_remove(mapping, &vm->va);
2603                 kfree(mapping);
2604         }
2605         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2606                 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2607                         amdgpu_vm_prt_fini(adev, vm);
2608                         prt_fini_needed = false;
2609                 }
2610
2611                 list_del(&mapping->list);
2612                 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2613         }
2614
2615         root = amdgpu_bo_ref(vm->root.base.bo);
2616         r = amdgpu_bo_reserve(root, true);
2617         if (r) {
2618                 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2619         } else {
2620                 amdgpu_vm_free_levels(adev, &vm->root,
2621                                       adev->vm_manager.root_level);
2622                 amdgpu_bo_unreserve(root);
2623         }
2624         amdgpu_bo_unref(&root);
2625         dma_fence_put(vm->last_update);
2626         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2627                 amdgpu_vmid_free_reserved(adev, vm, i);
2628 }
2629
2630 /**
2631  * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2632  *
2633  * @adev: amdgpu_device pointer
2634  * @pasid: PASID do identify the VM
2635  *
2636  * This function is expected to be called in interrupt context. Returns
2637  * true if there was fault credit, false otherwise
2638  */
2639 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2640                                   unsigned int pasid)
2641 {
2642         struct amdgpu_vm *vm;
2643
2644         spin_lock(&adev->vm_manager.pasid_lock);
2645         vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2646         if (!vm) {
2647                 /* VM not found, can't track fault credit */
2648                 spin_unlock(&adev->vm_manager.pasid_lock);
2649                 return true;
2650         }
2651
2652         /* No lock needed. only accessed by IRQ handler */
2653         if (!vm->fault_credit) {
2654                 /* Too many faults in this VM */
2655                 spin_unlock(&adev->vm_manager.pasid_lock);
2656                 return false;
2657         }
2658
2659         vm->fault_credit--;
2660         spin_unlock(&adev->vm_manager.pasid_lock);
2661         return true;
2662 }
2663
2664 /**
2665  * amdgpu_vm_manager_init - init the VM manager
2666  *
2667  * @adev: amdgpu_device pointer
2668  *
2669  * Initialize the VM manager structures
2670  */
2671 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2672 {
2673         unsigned i;
2674
2675         amdgpu_vmid_mgr_init(adev);
2676
2677         adev->vm_manager.fence_context =
2678                 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2679         for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2680                 adev->vm_manager.seqno[i] = 0;
2681
2682         atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2683         spin_lock_init(&adev->vm_manager.prt_lock);
2684         atomic_set(&adev->vm_manager.num_prt_users, 0);
2685
2686         /* If not overridden by the user, by default, only in large BAR systems
2687          * Compute VM tables will be updated by CPU
2688          */
2689 #ifdef CONFIG_X86_64
2690         if (amdgpu_vm_update_mode == -1) {
2691                 if (amdgpu_vm_is_large_bar(adev))
2692                         adev->vm_manager.vm_update_mode =
2693                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2694                 else
2695                         adev->vm_manager.vm_update_mode = 0;
2696         } else
2697                 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2698 #else
2699         adev->vm_manager.vm_update_mode = 0;
2700 #endif
2701
2702         idr_init(&adev->vm_manager.pasid_idr);
2703         spin_lock_init(&adev->vm_manager.pasid_lock);
2704 }
2705
2706 /**
2707  * amdgpu_vm_manager_fini - cleanup VM manager
2708  *
2709  * @adev: amdgpu_device pointer
2710  *
2711  * Cleanup the VM manager and free resources.
2712  */
2713 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2714 {
2715         WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2716         idr_destroy(&adev->vm_manager.pasid_idr);
2717
2718         amdgpu_vmid_mgr_fini(adev);
2719 }
2720
2721 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2722 {
2723         union drm_amdgpu_vm *args = data;
2724         struct amdgpu_device *adev = dev->dev_private;
2725         struct amdgpu_fpriv *fpriv = filp->driver_priv;
2726         int r;
2727
2728         switch (args->in.op) {
2729         case AMDGPU_VM_OP_RESERVE_VMID:
2730                 /* current, we only have requirement to reserve vmid from gfxhub */
2731                 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2732                 if (r)
2733                         return r;
2734                 break;
2735         case AMDGPU_VM_OP_UNRESERVE_VMID:
2736                 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2737                 break;
2738         default:
2739                 return -EINVAL;
2740         }
2741
2742         return 0;
2743 }