drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/dma-fence-array.h>
  29 #include <linux/interval_tree_generic.h>
  30 #include <linux/idr.h>
  31 #include <drm/drmP.h>
  32 #include <drm/amdgpu_drm.h>
  33 #include "amdgpu.h"
  34 #include "amdgpu_trace.h"
  35 #include "amdgpu_amdkfd.h"
  36
  37 /*
  38  * GPUVM
  39  * GPUVM is similar to the legacy gart on older asics, however
  40  * rather than there being a single global gart table
  41  * for the entire GPU, there are multiple VM page tables active
  42  * at any given time.  The VM page tables can contain a mix
  43  * vram pages and system memory pages and system memory pages
  44  * can be mapped as snooped (cached system pages) or unsnooped
  45  * (uncached system pages).
  46  * Each VM has an ID associated with it and there is a page table
  47  * associated with each VMID.  When execting a command buffer,
  48  * the kernel tells the the ring what VMID to use for that command
  49  * buffer.  VMIDs are allocated dynamically as commands are submitted.
  50  * The userspace drivers maintain their own address space and the kernel
  51  * sets up their pages tables accordingly when they submit their
  52  * command buffers and a VMID is assigned.
  53  * Cayman/Trinity support up to 8 active VMs at any given time;
  54  * SI supports 16.
  55  */
  56
  57 #define START(node) ((node)->start)
  58 #define LAST(node) ((node)->last)
  59
  60 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
  61                      START, LAST, static, amdgpu_vm_it)
  62
  63 #undef START
  64 #undef LAST
  65
  66 /* Local structure. Encapsulate some VM table update parameters to reduce
  67  * the number of function parameters
  68  */
  69 struct amdgpu_pte_update_params {
  70         /* amdgpu device we do this update for */
  71         struct amdgpu_device *adev;
  72         /* optional amdgpu_vm we do this update for */
  73         struct amdgpu_vm *vm;
  74         /* address where to copy page table entries from */
  75         uint64_t src;
  76         /* indirect buffer to fill with commands */
  77         struct amdgpu_ib *ib;
  78         /* Function which actually does the update */
  79         void (*func)(struct amdgpu_pte_update_params *params,
  80                      struct amdgpu_bo *bo, uint64_t pe,
  81                      uint64_t addr, unsigned count, uint32_t incr,
  82                      uint64_t flags);
  83         /* The next two are used during VM update by CPU
  84          *  DMA addresses to use for mapping
  85          *  Kernel pointer of PD/PT BO that needs to be updated
  86          */
  87         dma_addr_t *pages_addr;
  88         void *kptr;
  89 };
  90
  91 /* Helper to disable partial resident texture feature from a fence callback */
  92 struct amdgpu_prt_cb {
  93         struct amdgpu_device *adev;
  94         struct dma_fence_cb cb;
  95 };
  96
  97 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
  98                                    struct amdgpu_vm *vm,
  99                                    struct amdgpu_bo *bo)
 100 {
 101         base->vm = vm;
 102         base->bo = bo;
 103         INIT_LIST_HEAD(&base->bo_list);
 104         INIT_LIST_HEAD(&base->vm_status);
 105
 106         if (!bo)
 107                 return;
 108         list_add_tail(&base->bo_list, &bo->va);
 109
 110         if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
 111                 return;
 112
 113         if (bo->preferred_domains &
 114             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
 115                 return;
 116
 117         /*
 118          * we checked all the prerequisites, but it looks like this per vm bo
 119          * is currently evicted. add the bo to the evicted list to make sure it
 120          * is validated on next vm use to avoid fault.
 121          * */
 122         list_move_tail(&base->vm_status, &vm->evicted);
 123 }
 124
 125 /**
 126  * amdgpu_vm_level_shift - return the addr shift for each level
 127  *
 128  * @adev: amdgpu_device pointer
 129  *
 130  * Returns the number of bits the pfn needs to be right shifted for a level.
 131  */
 132 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 133                                       unsigned level)
 134 {
 135         unsigned shift = 0xff;
 136
 137         switch (level) {
 138         case AMDGPU_VM_PDB2:
 139         case AMDGPU_VM_PDB1:
 140         case AMDGPU_VM_PDB0:
 141                 shift = 9 * (AMDGPU_VM_PDB0 - level) +
 142                         adev->vm_manager.block_size;
 143                 break;
 144         case AMDGPU_VM_PTB:
 145                 shift = 0;
 146                 break;
 147         default:
 148                 dev_err(adev->dev, "the level%d isn't supported.\n", level);
 149         }
 150
 151         return shift;
 152 }
 153
 154 /**
 155  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
 156  *
 157  * @adev: amdgpu_device pointer
 158  *
 159  * Calculate the number of entries in a page directory or page table.
 160  */
 161 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 162                                       unsigned level)
 163 {
 164         unsigned shift = amdgpu_vm_level_shift(adev,
 165                                                adev->vm_manager.root_level);
 166
 167         if (level == adev->vm_manager.root_level)
 168                 /* For the root directory */
 169                 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
 170         else if (level != AMDGPU_VM_PTB)
 171                 /* Everything in between */
 172                 return 512;
 173         else
 174                 /* For the page tables on the leaves */
 175                 return AMDGPU_VM_PTE_COUNT(adev);
 176 }
 177
 178 /**
 179  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
 180  *
 181  * @adev: amdgpu_device pointer
 182  *
 183  * Calculate the size of the BO for a page directory or page table in bytes.
 184  */
 185 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 186 {
 187         return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
 188 }
 189
 190 /**
 191  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
 192  *
 193  * @vm: vm providing the BOs
 194  * @validated: head of validation list
 195  * @entry: entry to add
 196  *
 197  * Add the page directory to the list of BOs to
 198  * validate for command submission.
 199  */
 200 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 201                          struct list_head *validated,
 202                          struct amdgpu_bo_list_entry *entry)
 203 {
 204         entry->robj = vm->root.base.bo;
 205         entry->priority = 0;
 206         entry->tv.bo = &entry->robj->tbo;
 207         entry->tv.shared = true;
 208         entry->user_pages = NULL;
 209         list_add(&entry->tv.head, validated);
 210 }
 211
 212 /**
 213  * amdgpu_vm_validate_pt_bos - validate the page table BOs
 214  *
 215  * @adev: amdgpu device pointer
 216  * @vm: vm providing the BOs
 217  * @validate: callback to do the validation
 218  * @param: parameter for the validation callback
 219  *
 220  * Validate the page table BOs on command submission if neccessary.
 221  */
 222 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 223                               int (*validate)(void *p, struct amdgpu_bo *bo),
 224                               void *param)
 225 {
 226         struct ttm_bo_global *glob = adev->mman.bdev.glob;
 227         struct amdgpu_vm_bo_base *bo_base, *tmp;
 228         int r = 0;
 229
 230         list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 231                 struct amdgpu_bo *bo = bo_base->bo;
 232
 233                 if (bo->parent) {
 234                         r = validate(param, bo);
 235                         if (r)
 236                                 break;
 237
 238                         spin_lock(&glob->lru_lock);
 239                         ttm_bo_move_to_lru_tail(&bo->tbo);
 240                         if (bo->shadow)
 241                                 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
 242                         spin_unlock(&glob->lru_lock);
 243                 }
 244
 245                 if (bo->tbo.type == ttm_bo_type_kernel &&
 246                     vm->use_cpu_for_update) {
 247                         r = amdgpu_bo_kmap(bo, NULL);
 248                         if (r)
 249                                 break;
 250                 }
 251
 252                 if (bo->tbo.type != ttm_bo_type_kernel) {
 253                         spin_lock(&vm->moved_lock);
 254                         list_move(&bo_base->vm_status, &vm->moved);
 255                         spin_unlock(&vm->moved_lock);
 256                 } else {
 257                         list_move(&bo_base->vm_status, &vm->relocated);
 258                 }
 259         }
 260
 261         return r;
 262 }
 263
 264 /**
 265  * amdgpu_vm_ready - check VM is ready for updates
 266  *
 267  * @vm: VM to check
 268  *
 269  * Check if all VM PDs/PTs are ready for updates
 270  */
 271 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 272 {
 273         return list_empty(&vm->evicted);
 274 }
 275
 276 /**
 277  * amdgpu_vm_clear_bo - initially clear the PDs/PTs
 278  *
 279  * @adev: amdgpu_device pointer
 280  * @bo: BO to clear
 281  * @level: level this BO is at
 282  *
 283  * Root PD needs to be reserved when calling this.
 284  */
 285 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 286                               struct amdgpu_vm *vm, struct amdgpu_bo *bo,
 287                               unsigned level, bool pte_support_ats)
 288 {
 289         struct ttm_operation_ctx ctx = { true, false };
 290         struct dma_fence *fence = NULL;
 291         unsigned entries, ats_entries;
 292         struct amdgpu_ring *ring;
 293         struct amdgpu_job *job;
 294         uint64_t addr;
 295         int r;
 296
 297         addr = amdgpu_bo_gpu_offset(bo);
 298         entries = amdgpu_bo_size(bo) / 8;
 299
 300         if (pte_support_ats) {
 301                 if (level == adev->vm_manager.root_level) {
 302                         ats_entries = amdgpu_vm_level_shift(adev, level);
 303                         ats_entries += AMDGPU_GPU_PAGE_SHIFT;
 304                         ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
 305                         ats_entries = min(ats_entries, entries);
 306                         entries -= ats_entries;
 307                 } else {
 308                         ats_entries = entries;
 309                         entries = 0;
 310                 }
 311         } else {
 312                 ats_entries = 0;
 313         }
 314
 315         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 316
 317         r = reservation_object_reserve_shared(bo->tbo.resv);
 318         if (r)
 319                 return r;
 320
 321         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 322         if (r)
 323                 goto error;
 324
 325         r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 326         if (r)
 327                 goto error;
 328
 329         if (ats_entries) {
 330                 uint64_t ats_value;
 331
 332                 ats_value = AMDGPU_PTE_DEFAULT_ATC;
 333                 if (level != AMDGPU_VM_PTB)
 334                         ats_value |= AMDGPU_PDE_PTE;
 335
 336                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 337                                       ats_entries, 0, ats_value);
 338                 addr += ats_entries * 8;
 339         }
 340
 341         if (entries)
 342                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
 343                                       entries, 0, 0);
 344
 345         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 346
 347         WARN_ON(job->ibs[0].length_dw > 64);
 348         r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
 349                              AMDGPU_FENCE_OWNER_UNDEFINED, false);
 350         if (r)
 351                 goto error_free;
 352
 353         r = amdgpu_job_submit(job, ring, &vm->entity,
 354                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 355         if (r)
 356                 goto error_free;
 357
 358         amdgpu_bo_fence(bo, fence, true);
 359         dma_fence_put(fence);
 360
 361         if (bo->shadow)
 362                 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
 363                                           level, pte_support_ats);
 364
 365         return 0;
 366
 367 error_free:
 368         amdgpu_job_free(job);
 369
 370 error:
 371         return r;
 372 }
 373
 374 /**
 375  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
 376  *
 377  * @adev: amdgpu_device pointer
 378  * @vm: requested vm
 379  * @saddr: start of the address range
 380  * @eaddr: end of the address range
 381  *
 382  * Make sure the page directories and page tables are allocated
 383  */
 384 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 385                                   struct amdgpu_vm *vm,
 386                                   struct amdgpu_vm_pt *parent,
 387                                   uint64_t saddr, uint64_t eaddr,
 388                                   unsigned level, bool ats)
 389 {
 390         unsigned shift = amdgpu_vm_level_shift(adev, level);
 391         unsigned pt_idx, from, to;
 392         u64 flags;
 393         int r;
 394
 395         if (!parent->entries) {
 396                 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 397
 398                 parent->entries = kvmalloc_array(num_entries,
 399                                                    sizeof(struct amdgpu_vm_pt),
 400                                                    GFP_KERNEL | __GFP_ZERO);
 401                 if (!parent->entries)
 402                         return -ENOMEM;
 403                 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
 404         }
 405
 406         from = saddr >> shift;
 407         to = eaddr >> shift;
 408         if (from >= amdgpu_vm_num_entries(adev, level) ||
 409             to >= amdgpu_vm_num_entries(adev, level))
 410                 return -EINVAL;
 411
 412         ++level;
 413         saddr = saddr & ((1 << shift) - 1);
 414         eaddr = eaddr & ((1 << shift) - 1);
 415
 416         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 417         if (vm->use_cpu_for_update)
 418                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 419         else
 420                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 421                                 AMDGPU_GEM_CREATE_SHADOW);
 422
 423         /* walk over the address space and allocate the page tables */
 424         for (pt_idx = from; pt_idx <= to; ++pt_idx) {
 425                 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 426                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 427                 struct amdgpu_bo *pt;
 428
 429                 if (!entry->base.bo) {
 430                         struct amdgpu_bo_param bp;
 431
 432                         memset(&bp, 0, sizeof(bp));
 433                         bp.size = amdgpu_vm_bo_size(adev, level);
 434                         bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
 435                         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
 436                         bp.flags = flags;
 437                         bp.type = ttm_bo_type_kernel;
 438                         bp.resv = resv;
 439                         r = amdgpu_bo_create(adev, &bp, &pt);
 440                         if (r)
 441                                 return r;
 442
 443                         r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
 444                         if (r) {
 445                                 amdgpu_bo_unref(&pt->shadow);
 446                                 amdgpu_bo_unref(&pt);
 447                                 return r;
 448                         }
 449
 450                         if (vm->use_cpu_for_update) {
 451                                 r = amdgpu_bo_kmap(pt, NULL);
 452                                 if (r) {
 453                                         amdgpu_bo_unref(&pt->shadow);
 454                                         amdgpu_bo_unref(&pt);
 455                                         return r;
 456                                 }
 457                         }
 458
 459                         /* Keep a reference to the root directory to avoid
 460                         * freeing them up in the wrong order.
 461                         */
 462                         pt->parent = amdgpu_bo_ref(parent->base.bo);
 463
 464                         amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 465                         list_move(&entry->base.vm_status, &vm->relocated);
 466                 }
 467
 468                 if (level < AMDGPU_VM_PTB) {
 469                         uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 470                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 471                                 ((1 << shift) - 1);
 472                         r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
 473                                                    sub_eaddr, level, ats);
 474                         if (r)
 475                                 return r;
 476                 }
 477         }
 478
 479         return 0;
 480 }
 481
 482 /**
 483  * amdgpu_vm_alloc_pts - Allocate page tables.
 484  *
 485  * @adev: amdgpu_device pointer
 486  * @vm: VM to allocate page tables for
 487  * @saddr: Start address which needs to be allocated
 488  * @size: Size from start address we need.
 489  *
 490  * Make sure the page tables are allocated.
 491  */
 492 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 493                         struct amdgpu_vm *vm,
 494                         uint64_t saddr, uint64_t size)
 495 {
 496         uint64_t eaddr;
 497         bool ats = false;
 498
 499         /* validate the parameters */
 500         if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
 501                 return -EINVAL;
 502
 503         eaddr = saddr + size - 1;
 504
 505         if (vm->pte_support_ats)
 506                 ats = saddr < AMDGPU_VA_HOLE_START;
 507
 508         saddr /= AMDGPU_GPU_PAGE_SIZE;
 509         eaddr /= AMDGPU_GPU_PAGE_SIZE;
 510
 511         if (eaddr >= adev->vm_manager.max_pfn) {
 512                 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
 513                         eaddr, adev->vm_manager.max_pfn);
 514                 return -EINVAL;
 515         }
 516
 517         return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
 518                                       adev->vm_manager.root_level, ats);
 519 }
 520
 521 /**
 522  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
 523  *
 524  * @adev: amdgpu_device pointer
 525  */
 526 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 527 {
 528         const struct amdgpu_ip_block *ip_block;
 529         bool has_compute_vm_bug;
 530         struct amdgpu_ring *ring;
 531         int i;
 532
 533         has_compute_vm_bug = false;
 534
 535         ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
 536         if (ip_block) {
 537                 /* Compute has a VM bug for GFX version < 7.
 538                    Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
 539                 if (ip_block->version->major <= 7)
 540                         has_compute_vm_bug = true;
 541                 else if (ip_block->version->major == 8)
 542                         if (adev->gfx.mec_fw_version < 673)
 543                                 has_compute_vm_bug = true;
 544         }
 545
 546         for (i = 0; i < adev->num_rings; i++) {
 547                 ring = adev->rings[i];
 548                 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
 549                         /* only compute rings */
 550                         ring->has_compute_vm_bug = has_compute_vm_bug;
 551                 else
 552                         ring->has_compute_vm_bug = false;
 553         }
 554 }
 555
 556 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 557                                   struct amdgpu_job *job)
 558 {
 559         struct amdgpu_device *adev = ring->adev;
 560         unsigned vmhub = ring->funcs->vmhub;
 561         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 562         struct amdgpu_vmid *id;
 563         bool gds_switch_needed;
 564         bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 565
 566         if (job->vmid == 0)
 567                 return false;
 568         id = &id_mgr->ids[job->vmid];
 569         gds_switch_needed = ring->funcs->emit_gds_switch && (
 570                 id->gds_base != job->gds_base ||
 571                 id->gds_size != job->gds_size ||
 572                 id->gws_base != job->gws_base ||
 573                 id->gws_size != job->gws_size ||
 574                 id->oa_base != job->oa_base ||
 575                 id->oa_size != job->oa_size);
 576
 577         if (amdgpu_vmid_had_gpu_reset(adev, id))
 578                 return true;
 579
 580         return vm_flush_needed || gds_switch_needed;
 581 }
 582
 583 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 584 {
 585         return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
 586 }
 587
 588 /**
 589  * amdgpu_vm_flush - hardware flush the vm
 590  *
 591  * @ring: ring to use for flush
 592  * @vmid: vmid number to use
 593  * @pd_addr: address of the page directory
 594  *
 595  * Emit a VM flush when it is necessary.
 596  */
 597 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
 598 {
 599         struct amdgpu_device *adev = ring->adev;
 600         unsigned vmhub = ring->funcs->vmhub;
 601         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 602         struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
 603         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 604                 id->gds_base != job->gds_base ||
 605                 id->gds_size != job->gds_size ||
 606                 id->gws_base != job->gws_base ||
 607                 id->gws_size != job->gws_size ||
 608                 id->oa_base != job->oa_base ||
 609                 id->oa_size != job->oa_size);
 610         bool vm_flush_needed = job->vm_needs_flush;
 611         bool pasid_mapping_needed = id->pasid != job->pasid ||
 612                 !id->pasid_mapping ||
 613                 !dma_fence_is_signaled(id->pasid_mapping);
 614         struct dma_fence *fence = NULL;
 615         unsigned patch_offset = 0;
 616         int r;
 617
 618         if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 619                 gds_switch_needed = true;
 620                 vm_flush_needed = true;
 621                 pasid_mapping_needed = true;
 622         }
 623
 624         gds_switch_needed &= !!ring->funcs->emit_gds_switch;
 625         vm_flush_needed &= !!ring->funcs->emit_vm_flush;
 626         pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
 627                 ring->funcs->emit_wreg;
 628
 629         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 630                 return 0;
 631
 632         if (ring->funcs->init_cond_exec)
 633                 patch_offset = amdgpu_ring_init_cond_exec(ring);
 634
 635         if (need_pipe_sync)
 636                 amdgpu_ring_emit_pipeline_sync(ring);
 637
 638         if (vm_flush_needed) {
 639                 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
 640                 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
 641         }
 642
 643         if (pasid_mapping_needed)
 644                 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 645
 646         if (vm_flush_needed || pasid_mapping_needed) {
 647                 r = amdgpu_fence_emit(ring, &fence, 0);
 648                 if (r)
 649                         return r;
 650         }
 651
 652         if (vm_flush_needed) {
 653                 mutex_lock(&id_mgr->lock);
 654                 dma_fence_put(id->last_flush);
 655                 id->last_flush = dma_fence_get(fence);
 656                 id->current_gpu_reset_count =
 657                         atomic_read(&adev->gpu_reset_counter);
 658                 mutex_unlock(&id_mgr->lock);
 659         }
 660
 661         if (pasid_mapping_needed) {
 662                 id->pasid = job->pasid;
 663                 dma_fence_put(id->pasid_mapping);
 664                 id->pasid_mapping = dma_fence_get(fence);
 665         }
 666         dma_fence_put(fence);
 667
 668         if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 669                 id->gds_base = job->gds_base;
 670                 id->gds_size = job->gds_size;
 671                 id->gws_base = job->gws_base;
 672                 id->gws_size = job->gws_size;
 673                 id->oa_base = job->oa_base;
 674                 id->oa_size = job->oa_size;
 675                 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 676                                             job->gds_size, job->gws_base,
 677                                             job->gws_size, job->oa_base,
 678                                             job->oa_size);
 679         }
 680
 681         if (ring->funcs->patch_cond_exec)
 682                 amdgpu_ring_patch_cond_exec(ring, patch_offset);
 683
 684         /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
 685         if (ring->funcs->emit_switch_buffer) {
 686                 amdgpu_ring_emit_switch_buffer(ring);
 687                 amdgpu_ring_emit_switch_buffer(ring);
 688         }
 689         return 0;
 690 }
 691
 692 /**
 693  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
 694  *
 695  * @vm: requested vm
 696  * @bo: requested buffer object
 697  *
 698  * Find @bo inside the requested vm.
 699  * Search inside the @bos vm list for the requested vm
 700  * Returns the found bo_va or NULL if none is found
 701  *
 702  * Object has to be reserved!
 703  */
 704 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 705                                        struct amdgpu_bo *bo)
 706 {
 707         struct amdgpu_bo_va *bo_va;
 708
 709         list_for_each_entry(bo_va, &bo->va, base.bo_list) {
 710                 if (bo_va->base.vm == vm) {
 711                         return bo_va;
 712                 }
 713         }
 714         return NULL;
 715 }
 716
 717 /**
 718  * amdgpu_vm_do_set_ptes - helper to call the right asic function
 719  *
 720  * @params: see amdgpu_pte_update_params definition
 721  * @bo: PD/PT to update
 722  * @pe: addr of the page entry
 723  * @addr: dst addr to write into pe
 724  * @count: number of page entries to update
 725  * @incr: increase next addr by incr bytes
 726  * @flags: hw access flags
 727  *
 728  * Traces the parameters and calls the right asic functions
 729  * to setup the page table using the DMA.
 730  */
 731 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
 732                                   struct amdgpu_bo *bo,
 733                                   uint64_t pe, uint64_t addr,
 734                                   unsigned count, uint32_t incr,
 735                                   uint64_t flags)
 736 {
 737         pe += amdgpu_bo_gpu_offset(bo);
 738         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 739
 740         if (count < 3) {
 741                 amdgpu_vm_write_pte(params->adev, params->ib, pe,
 742                                     addr | flags, count, incr);
 743
 744         } else {
 745                 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
 746                                       count, incr, flags);
 747         }
 748 }
 749
 750 /**
 751  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
 752  *
 753  * @params: see amdgpu_pte_update_params definition
 754  * @bo: PD/PT to update
 755  * @pe: addr of the page entry
 756  * @addr: dst addr to write into pe
 757  * @count: number of page entries to update
 758  * @incr: increase next addr by incr bytes
 759  * @flags: hw access flags
 760  *
 761  * Traces the parameters and calls the DMA function to copy the PTEs.
 762  */
 763 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 764                                    struct amdgpu_bo *bo,
 765                                    uint64_t pe, uint64_t addr,
 766                                    unsigned count, uint32_t incr,
 767                                    uint64_t flags)
 768 {
 769         uint64_t src = (params->src + (addr >> 12) * 8);
 770
 771         pe += amdgpu_bo_gpu_offset(bo);
 772         trace_amdgpu_vm_copy_ptes(pe, src, count);
 773
 774         amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
 775 }
 776
 777 /**
 778  * amdgpu_vm_map_gart - Resolve gart mapping of addr
 779  *
 780  * @pages_addr: optional DMA address to use for lookup
 781  * @addr: the unmapped addr
 782  *
 783  * Look up the physical address of the page that the pte resolves
 784  * to and return the pointer for the page table entry.
 785  */
 786 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 787 {
 788         uint64_t result;
 789
 790         /* page table offset */
 791         result = pages_addr[addr >> PAGE_SHIFT];
 792
 793         /* in case cpu page size != gpu page size*/
 794         result |= addr & (~PAGE_MASK);
 795
 796         result &= 0xFFFFFFFFFFFFF000ULL;
 797
 798         return result;
 799 }
 800
 801 /**
 802  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
 803  *
 804  * @params: see amdgpu_pte_update_params definition
 805  * @bo: PD/PT to update
 806  * @pe: kmap addr of the page entry
 807  * @addr: dst addr to write into pe
 808  * @count: number of page entries to update
 809  * @incr: increase next addr by incr bytes
 810  * @flags: hw access flags
 811  *
 812  * Write count number of PT/PD entries directly.
 813  */
 814 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 815                                    struct amdgpu_bo *bo,
 816                                    uint64_t pe, uint64_t addr,
 817                                    unsigned count, uint32_t incr,
 818                                    uint64_t flags)
 819 {
 820         unsigned int i;
 821         uint64_t value;
 822
 823         pe += (unsigned long)amdgpu_bo_kptr(bo);
 824
 825         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
 826
 827         for (i = 0; i < count; i++) {
 828                 value = params->pages_addr ?
 829                         amdgpu_vm_map_gart(params->pages_addr, addr) :
 830                         addr;
 831                 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
 832                                        i, value, flags);
 833                 addr += incr;
 834         }
 835 }
 836
 837 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 838                              void *owner)
 839 {
 840         struct amdgpu_sync sync;
 841         int r;
 842
 843         amdgpu_sync_create(&sync);
 844         amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
 845         r = amdgpu_sync_wait(&sync, true);
 846         amdgpu_sync_free(&sync);
 847
 848         return r;
 849 }
 850
 851 /*
 852  * amdgpu_vm_update_pde - update a single level in the hierarchy
 853  *
 854  * @param: parameters for the update
 855  * @vm: requested vm
 856  * @parent: parent directory
 857  * @entry: entry to update
 858  *
 859  * Makes sure the requested entry in parent is up to date.
 860  */
 861 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 862                                  struct amdgpu_vm *vm,
 863                                  struct amdgpu_vm_pt *parent,
 864                                  struct amdgpu_vm_pt *entry)
 865 {
 866         struct amdgpu_bo *bo = parent->base.bo, *pbo;
 867         uint64_t pde, pt, flags;
 868         unsigned level;
 869
 870         /* Don't update huge pages here */
 871         if (entry->huge)
 872                 return;
 873
 874         for (level = 0, pbo = bo->parent; pbo; ++level)
 875                 pbo = pbo->parent;
 876
 877         level += params->adev->vm_manager.root_level;
 878         pt = amdgpu_bo_gpu_offset(entry->base.bo);
 879         flags = AMDGPU_PTE_VALID;
 880         amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
 881         pde = (entry - parent->entries) * 8;
 882         if (bo->shadow)
 883                 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
 884         params->func(params, bo, pde, pt, 1, 0, flags);
 885 }
 886
 887 /*
 888  * amdgpu_vm_invalidate_level - mark all PD levels as invalid
 889  *
 890  * @parent: parent PD
 891  *
 892  * Mark all PD level as invalid after an error.
 893  */
 894 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 895                                        struct amdgpu_vm *vm,
 896                                        struct amdgpu_vm_pt *parent,
 897                                        unsigned level)
 898 {
 899         unsigned pt_idx, num_entries;
 900
 901         /*
 902          * Recurse into the subdirectories. This recursion is harmless because
 903          * we only have a maximum of 5 layers.
 904          */
 905         num_entries = amdgpu_vm_num_entries(adev, level);
 906         for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 907                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 908
 909                 if (!entry->base.bo)
 910                         continue;
 911
 912                 if (list_empty(&entry->base.vm_status))
 913                         list_add(&entry->base.vm_status, &vm->relocated);
 914                 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 915         }
 916 }
 917
 918 /*
 919  * amdgpu_vm_update_directories - make sure that all directories are valid
 920  *
 921  * @adev: amdgpu_device pointer
 922  * @vm: requested vm
 923  *
 924  * Makes sure all directories are up to date.
 925  * Returns 0 for success, error for failure.
 926  */
 927 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 928                                  struct amdgpu_vm *vm)
 929 {
 930         struct amdgpu_pte_update_params params;
 931         struct amdgpu_job *job;
 932         unsigned ndw = 0;
 933         int r = 0;
 934
 935         if (list_empty(&vm->relocated))
 936                 return 0;
 937
 938 restart:
 939         memset(&params, 0, sizeof(params));
 940         params.adev = adev;
 941
 942         if (vm->use_cpu_for_update) {
 943                 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 944                 if (unlikely(r))
 945                         return r;
 946
 947                 params.func = amdgpu_vm_cpu_set_ptes;
 948         } else {
 949                 ndw = 512 * 8;
 950                 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 951                 if (r)
 952                         return r;
 953
 954                 params.ib = &job->ibs[0];
 955                 params.func = amdgpu_vm_do_set_ptes;
 956         }
 957
 958         while (!list_empty(&vm->relocated)) {
 959                 struct amdgpu_vm_bo_base *bo_base, *parent;
 960                 struct amdgpu_vm_pt *pt, *entry;
 961                 struct amdgpu_bo *bo;
 962
 963                 bo_base = list_first_entry(&vm->relocated,
 964                                            struct amdgpu_vm_bo_base,
 965                                            vm_status);
 966                 list_del_init(&bo_base->vm_status);
 967
 968                 bo = bo_base->bo->parent;
 969                 if (!bo)
 970                         continue;
 971
 972                 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 973                                           bo_list);
 974                 pt = container_of(parent, struct amdgpu_vm_pt, base);
 975                 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
 976
 977                 amdgpu_vm_update_pde(&params, vm, pt, entry);
 978
 979                 if (!vm->use_cpu_for_update &&
 980                     (ndw - params.ib->length_dw) < 32)
 981                         break;
 982         }
 983
 984         if (vm->use_cpu_for_update) {
 985                 /* Flush HDP */
 986                 mb();
 987                 amdgpu_asic_flush_hdp(adev, NULL);
 988         } else if (params.ib->length_dw == 0) {
 989                 amdgpu_job_free(job);
 990         } else {
 991                 struct amdgpu_bo *root = vm->root.base.bo;
 992                 struct amdgpu_ring *ring;
 993                 struct dma_fence *fence;
 994
 995                 ring = container_of(vm->entity.sched, struct amdgpu_ring,
 996                                     sched);
 997
 998                 amdgpu_ring_pad_ib(ring, params.ib);
 999                 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1000                                  AMDGPU_FENCE_OWNER_VM, false);
1001                 WARN_ON(params.ib->length_dw > ndw);
1002                 r = amdgpu_job_submit(job, ring, &vm->entity,
1003                                       AMDGPU_FENCE_OWNER_VM, &fence);
1004                 if (r)
1005                         goto error;
1006
1007                 amdgpu_bo_fence(root, fence, true);
1008                 dma_fence_put(vm->last_update);
1009                 vm->last_update = fence;
1010         }
1011
1012         if (!list_empty(&vm->relocated))
1013                 goto restart;
1014
1015         return 0;
1016
1017 error:
1018         amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1019                                    adev->vm_manager.root_level);
1020         amdgpu_job_free(job);
1021         return r;
1022 }
1023
1024 /**
1025  * amdgpu_vm_find_entry - find the entry for an address
1026  *
1027  * @p: see amdgpu_pte_update_params definition
1028  * @addr: virtual address in question
1029  * @entry: resulting entry or NULL
1030  * @parent: parent entry
1031  *
1032  * Find the vm_pt entry and it's parent for the given address.
1033  */
1034 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1035                          struct amdgpu_vm_pt **entry,
1036                          struct amdgpu_vm_pt **parent)
1037 {
1038         unsigned level = p->adev->vm_manager.root_level;
1039
1040         *parent = NULL;
1041         *entry = &p->vm->root;
1042         while ((*entry)->entries) {
1043                 unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1044
1045                 *parent = *entry;
1046                 *entry = &(*entry)->entries[addr >> shift];
1047                 addr &= (1ULL << shift) - 1;
1048         }
1049
1050         if (level != AMDGPU_VM_PTB)
1051                 *entry = NULL;
1052 }
1053
1054 /**
1055  * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1056  *
1057  * @p: see amdgpu_pte_update_params definition
1058  * @entry: vm_pt entry to check
1059  * @parent: parent entry
1060  * @nptes: number of PTEs updated with this operation
1061  * @dst: destination address where the PTEs should point to
1062  * @flags: access flags fro the PTEs
1063  *
1064  * Check if we can update the PD with a huge page.
1065  */
1066 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1067                                         struct amdgpu_vm_pt *entry,
1068                                         struct amdgpu_vm_pt *parent,
1069                                         unsigned nptes, uint64_t dst,
1070                                         uint64_t flags)
1071 {
1072         uint64_t pde;
1073
1074         /* In the case of a mixed PT the PDE must point to it*/
1075         if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1076             nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1077                 /* Set the huge page flag to stop scanning at this PDE */
1078                 flags |= AMDGPU_PDE_PTE;
1079         }
1080
1081         if (!(flags & AMDGPU_PDE_PTE)) {
1082                 if (entry->huge) {
1083                         /* Add the entry to the relocated list to update it. */
1084                         entry->huge = false;
1085                         list_move(&entry->base.vm_status, &p->vm->relocated);
1086                 }
1087                 return;
1088         }
1089
1090         entry->huge = true;
1091         amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1092
1093         pde = (entry - parent->entries) * 8;
1094         if (parent->base.bo->shadow)
1095                 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1096         p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1097 }
1098
1099 /**
1100  * amdgpu_vm_update_ptes - make sure that page tables are valid
1101  *
1102  * @params: see amdgpu_pte_update_params definition
1103  * @vm: requested vm
1104  * @start: start of GPU address range
1105  * @end: end of GPU address range
1106  * @dst: destination address to map to, the next dst inside the function
1107  * @flags: mapping flags
1108  *
1109  * Update the page tables in the range @start - @end.
1110  * Returns 0 for success, -EINVAL for failure.
1111  */
1112 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1113                                   uint64_t start, uint64_t end,
1114                                   uint64_t dst, uint64_t flags)
1115 {
1116         struct amdgpu_device *adev = params->adev;
1117         const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1118
1119         uint64_t addr, pe_start;
1120         struct amdgpu_bo *pt;
1121         unsigned nptes;
1122
1123         /* walk over the address space and update the page tables */
1124         for (addr = start; addr < end; addr += nptes,
1125              dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1126                 struct amdgpu_vm_pt *entry, *parent;
1127
1128                 amdgpu_vm_get_entry(params, addr, &entry, &parent);
1129                 if (!entry)
1130                         return -ENOENT;
1131
1132                 if ((addr & ~mask) == (end & ~mask))
1133                         nptes = end - addr;
1134                 else
1135                         nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1136
1137                 amdgpu_vm_handle_huge_pages(params, entry, parent,
1138                                             nptes, dst, flags);
1139                 /* We don't need to update PTEs for huge pages */
1140                 if (entry->huge)
1141                         continue;
1142
1143                 pt = entry->base.bo;
1144                 pe_start = (addr & mask) * 8;
1145                 if (pt->shadow)
1146                         params->func(params, pt->shadow, pe_start, dst, nptes,
1147                                      AMDGPU_GPU_PAGE_SIZE, flags);
1148                 params->func(params, pt, pe_start, dst, nptes,
1149                              AMDGPU_GPU_PAGE_SIZE, flags);
1150         }
1151
1152         return 0;
1153 }
1154
1155 /*
1156  * amdgpu_vm_frag_ptes - add fragment information to PTEs
1157  *
1158  * @params: see amdgpu_pte_update_params definition
1159  * @vm: requested vm
1160  * @start: first PTE to handle
1161  * @end: last PTE to handle
1162  * @dst: addr those PTEs should point to
1163  * @flags: hw mapping flags
1164  * Returns 0 for success, -EINVAL for failure.
1165  */
1166 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params  *params,
1167                                 uint64_t start, uint64_t end,
1168                                 uint64_t dst, uint64_t flags)
1169 {
1170         /**
1171          * The MC L1 TLB supports variable sized pages, based on a fragment
1172          * field in the PTE. When this field is set to a non-zero value, page
1173          * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1174          * flags are considered valid for all PTEs within the fragment range
1175          * and corresponding mappings are assumed to be physically contiguous.
1176          *
1177          * The L1 TLB can store a single PTE for the whole fragment,
1178          * significantly increasing the space available for translation
1179          * caching. This leads to large improvements in throughput when the
1180          * TLB is under pressure.
1181          *
1182          * The L2 TLB distributes small and large fragments into two
1183          * asymmetric partitions. The large fragment cache is significantly
1184          * larger. Thus, we try to use large fragments wherever possible.
1185          * Userspace can support this by aligning virtual base address and
1186          * allocation size to the fragment size.
1187          */
1188         unsigned max_frag = params->adev->vm_manager.fragment_size;
1189         int r;
1190
1191         /* system pages are non continuously */
1192         if (params->src || !(flags & AMDGPU_PTE_VALID))
1193                 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1194
1195         while (start != end) {
1196                 uint64_t frag_flags, frag_end;
1197                 unsigned frag;
1198
1199                 /* This intentionally wraps around if no bit is set */
1200                 frag = min((unsigned)ffs(start) - 1,
1201                            (unsigned)fls64(end - start) - 1);
1202                 if (frag >= max_frag) {
1203                         frag_flags = AMDGPU_PTE_FRAG(max_frag);
1204                         frag_end = end & ~((1ULL << max_frag) - 1);
1205                 } else {
1206                         frag_flags = AMDGPU_PTE_FRAG(frag);
1207                         frag_end = start + (1 << frag);
1208                 }
1209
1210                 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1211                                           flags | frag_flags);
1212                 if (r)
1213                         return r;
1214
1215                 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1216                 start = frag_end;
1217         }
1218
1219         return 0;
1220 }
1221
1222 /**
1223  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1224  *
1225  * @adev: amdgpu_device pointer
1226  * @exclusive: fence we need to sync to
1227  * @pages_addr: DMA addresses to use for mapping
1228  * @vm: requested vm
1229  * @start: start of mapped range
1230  * @last: last mapped entry
1231  * @flags: flags for the entries
1232  * @addr: addr to set the area to
1233  * @fence: optional resulting fence
1234  *
1235  * Fill in the page table entries between @start and @last.
1236  * Returns 0 for success, -EINVAL for failure.
1237  */
1238 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1239                                        struct dma_fence *exclusive,
1240                                        dma_addr_t *pages_addr,
1241                                        struct amdgpu_vm *vm,
1242                                        uint64_t start, uint64_t last,
1243                                        uint64_t flags, uint64_t addr,
1244                                        struct dma_fence **fence)
1245 {
1246         struct amdgpu_ring *ring;
1247         void *owner = AMDGPU_FENCE_OWNER_VM;
1248         unsigned nptes, ncmds, ndw;
1249         struct amdgpu_job *job;
1250         struct amdgpu_pte_update_params params;
1251         struct dma_fence *f = NULL;
1252         int r;
1253
1254         memset(&params, 0, sizeof(params));
1255         params.adev = adev;
1256         params.vm = vm;
1257
1258         /* sync to everything on unmapping */
1259         if (!(flags & AMDGPU_PTE_VALID))
1260                 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1261
1262         if (vm->use_cpu_for_update) {
1263                 /* params.src is used as flag to indicate system Memory */
1264                 if (pages_addr)
1265                         params.src = ~0;
1266
1267                 /* Wait for PT BOs to be free. PTs share the same resv. object
1268                  * as the root PD BO
1269                  */
1270                 r = amdgpu_vm_wait_pd(adev, vm, owner);
1271                 if (unlikely(r))
1272                         return r;
1273
1274                 params.func = amdgpu_vm_cpu_set_ptes;
1275                 params.pages_addr = pages_addr;
1276                 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1277                                            addr, flags);
1278         }
1279
1280         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1281
1282         nptes = last - start + 1;
1283
1284         /*
1285          * reserve space for two commands every (1 << BLOCK_SIZE)
1286          *  entries or 2k dwords (whatever is smaller)
1287          *
1288          * The second command is for the shadow pagetables.
1289          */
1290         if (vm->root.base.bo->shadow)
1291                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1292         else
1293                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1294
1295         /* padding, etc. */
1296         ndw = 64;
1297
1298         if (pages_addr) {
1299                 /* copy commands needed */
1300                 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1301
1302                 /* and also PTEs */
1303                 ndw += nptes * 2;
1304
1305                 params.func = amdgpu_vm_do_copy_ptes;
1306
1307         } else {
1308                 /* set page commands needed */
1309                 ndw += ncmds * 10;
1310
1311                 /* extra commands for begin/end fragments */
1312                 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1313
1314                 params.func = amdgpu_vm_do_set_ptes;
1315         }
1316
1317         r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1318         if (r)
1319                 return r;
1320
1321         params.ib = &job->ibs[0];
1322
1323         if (pages_addr) {
1324                 uint64_t *pte;
1325                 unsigned i;
1326
1327                 /* Put the PTEs at the end of the IB. */
1328                 i = ndw - nptes * 2;
1329                 pte= (uint64_t *)&(job->ibs->ptr[i]);
1330                 params.src = job->ibs->gpu_addr + i * 4;
1331
1332                 for (i = 0; i < nptes; ++i) {
1333                         pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1334                                                     AMDGPU_GPU_PAGE_SIZE);
1335                         pte[i] |= flags;
1336                 }
1337                 addr = 0;
1338         }
1339
1340         r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1341         if (r)
1342                 goto error_free;
1343
1344         r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1345                              owner, false);
1346         if (r)
1347                 goto error_free;
1348
1349         r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1350         if (r)
1351                 goto error_free;
1352
1353         r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1354         if (r)
1355                 goto error_free;
1356
1357         amdgpu_ring_pad_ib(ring, params.ib);
1358         WARN_ON(params.ib->length_dw > ndw);
1359         r = amdgpu_job_submit(job, ring, &vm->entity,
1360                               AMDGPU_FENCE_OWNER_VM, &f);
1361         if (r)
1362                 goto error_free;
1363
1364         amdgpu_bo_fence(vm->root.base.bo, f, true);
1365         dma_fence_put(*fence);
1366         *fence = f;
1367         return 0;
1368
1369 error_free:
1370         amdgpu_job_free(job);
1371         return r;
1372 }
1373
1374 /**
1375  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1376  *
1377  * @adev: amdgpu_device pointer
1378  * @exclusive: fence we need to sync to
1379  * @pages_addr: DMA addresses to use for mapping
1380  * @vm: requested vm
1381  * @mapping: mapped range and flags to use for the update
1382  * @flags: HW flags for the mapping
1383  * @nodes: array of drm_mm_nodes with the MC addresses
1384  * @fence: optional resulting fence
1385  *
1386  * Split the mapping into smaller chunks so that each update fits
1387  * into a SDMA IB.
1388  * Returns 0 for success, -EINVAL for failure.
1389  */
1390 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1391                                       struct dma_fence *exclusive,
1392                                       dma_addr_t *pages_addr,
1393                                       struct amdgpu_vm *vm,
1394                                       struct amdgpu_bo_va_mapping *mapping,
1395                                       uint64_t flags,
1396                                       struct drm_mm_node *nodes,
1397                                       struct dma_fence **fence)
1398 {
1399         unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
1400         uint64_t pfn, start = mapping->start;
1401         int r;
1402
1403         /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1404          * but in case of something, we filter the flags in first place
1405          */
1406         if (!(mapping->flags & AMDGPU_PTE_READABLE))
1407                 flags &= ~AMDGPU_PTE_READABLE;
1408         if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1409                 flags &= ~AMDGPU_PTE_WRITEABLE;
1410
1411         flags &= ~AMDGPU_PTE_EXECUTABLE;
1412         flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1413
1414         flags &= ~AMDGPU_PTE_MTYPE_MASK;
1415         flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1416
1417         if ((mapping->flags & AMDGPU_PTE_PRT) &&
1418             (adev->asic_type >= CHIP_VEGA10)) {
1419                 flags |= AMDGPU_PTE_PRT;
1420                 flags &= ~AMDGPU_PTE_VALID;
1421         }
1422
1423         trace_amdgpu_vm_bo_update(mapping);
1424
1425         pfn = mapping->offset >> PAGE_SHIFT;
1426         if (nodes) {
1427                 while (pfn >= nodes->size) {
1428                         pfn -= nodes->size;
1429                         ++nodes;
1430                 }
1431         }
1432
1433         do {
1434                 dma_addr_t *dma_addr = NULL;
1435                 uint64_t max_entries;
1436                 uint64_t addr, last;
1437
1438                 if (nodes) {
1439                         addr = nodes->start << PAGE_SHIFT;
1440                         max_entries = (nodes->size - pfn) *
1441                                 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1442                 } else {
1443                         addr = 0;
1444                         max_entries = S64_MAX;
1445                 }
1446
1447                 if (pages_addr) {
1448                         uint64_t count;
1449
1450                         max_entries = min(max_entries, 16ull * 1024ull);
1451                         for (count = 1; count < max_entries; ++count) {
1452                                 uint64_t idx = pfn + count;
1453
1454                                 if (pages_addr[idx] !=
1455                                     (pages_addr[idx - 1] + PAGE_SIZE))
1456                                         break;
1457                         }
1458
1459                         if (count < min_linear_pages) {
1460                                 addr = pfn << PAGE_SHIFT;
1461                                 dma_addr = pages_addr;
1462                         } else {
1463                                 addr = pages_addr[pfn];
1464                                 max_entries = count;
1465                         }
1466
1467                 } else if (flags & AMDGPU_PTE_VALID) {
1468                         addr += adev->vm_manager.vram_base_offset;
1469                         addr += pfn << PAGE_SHIFT;
1470                 }
1471
1472                 last = min((uint64_t)mapping->last, start + max_entries - 1);
1473                 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1474                                                 start, last, flags, addr,
1475                                                 fence);
1476                 if (r)
1477                         return r;
1478
1479                 pfn += last - start + 1;
1480                 if (nodes && nodes->size == pfn) {
1481                         pfn = 0;
1482                         ++nodes;
1483                 }
1484                 start = last + 1;
1485
1486         } while (unlikely(start != mapping->last + 1));
1487
1488         return 0;
1489 }
1490
1491 /**
1492  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1493  *
1494  * @adev: amdgpu_device pointer
1495  * @bo_va: requested BO and VM object
1496  * @clear: if true clear the entries
1497  *
1498  * Fill in the page table entries for @bo_va.
1499  * Returns 0 for success, -EINVAL for failure.
1500  */
1501 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1502                         struct amdgpu_bo_va *bo_va,
1503                         bool clear)
1504 {
1505         struct amdgpu_bo *bo = bo_va->base.bo;
1506         struct amdgpu_vm *vm = bo_va->base.vm;
1507         struct amdgpu_bo_va_mapping *mapping;
1508         dma_addr_t *pages_addr = NULL;
1509         struct ttm_mem_reg *mem;
1510         struct drm_mm_node *nodes;
1511         struct dma_fence *exclusive, **last_update;
1512         uint64_t flags;
1513         int r;
1514
1515         if (clear || !bo_va->base.bo) {
1516                 mem = NULL;
1517                 nodes = NULL;
1518                 exclusive = NULL;
1519         } else {
1520                 struct ttm_dma_tt *ttm;
1521
1522                 mem = &bo_va->base.bo->tbo.mem;
1523                 nodes = mem->mm_node;
1524                 if (mem->mem_type == TTM_PL_TT) {
1525                         ttm = container_of(bo_va->base.bo->tbo.ttm,
1526                                            struct ttm_dma_tt, ttm);
1527                         pages_addr = ttm->dma_address;
1528                 }
1529                 exclusive = reservation_object_get_excl(bo->tbo.resv);
1530         }
1531
1532         if (bo)
1533                 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1534         else
1535                 flags = 0x0;
1536
1537         if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1538                 last_update = &vm->last_update;
1539         else
1540                 last_update = &bo_va->last_pt_update;
1541
1542         if (!clear && bo_va->base.moved) {
1543                 bo_va->base.moved = false;
1544                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1545
1546         } else if (bo_va->cleared != clear) {
1547                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1548         }
1549
1550         list_for_each_entry(mapping, &bo_va->invalids, list) {
1551                 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1552                                                mapping, flags, nodes,
1553                                                last_update);
1554                 if (r)
1555                         return r;
1556         }
1557
1558         if (vm->use_cpu_for_update) {
1559                 /* Flush HDP */
1560                 mb();
1561                 amdgpu_asic_flush_hdp(adev, NULL);
1562         }
1563
1564         spin_lock(&vm->moved_lock);
1565         list_del_init(&bo_va->base.vm_status);
1566         spin_unlock(&vm->moved_lock);
1567
1568         /* If the BO is not in its preferred location add it back to
1569          * the evicted list so that it gets validated again on the
1570          * next command submission.
1571          */
1572         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1573             !(bo->preferred_domains &
1574             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)))
1575                 list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1576
1577         list_splice_init(&bo_va->invalids, &bo_va->valids);
1578         bo_va->cleared = clear;
1579
1580         if (trace_amdgpu_vm_bo_mapping_enabled()) {
1581                 list_for_each_entry(mapping, &bo_va->valids, list)
1582                         trace_amdgpu_vm_bo_mapping(mapping);
1583         }
1584
1585         return 0;
1586 }
1587
1588 /**
1589  * amdgpu_vm_update_prt_state - update the global PRT state
1590  */
1591 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1592 {
1593         unsigned long flags;
1594         bool enable;
1595
1596         spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1597         enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1598         adev->gmc.gmc_funcs->set_prt(adev, enable);
1599         spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1600 }
1601
1602 /**
1603  * amdgpu_vm_prt_get - add a PRT user
1604  */
1605 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1606 {
1607         if (!adev->gmc.gmc_funcs->set_prt)
1608                 return;
1609
1610         if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1611                 amdgpu_vm_update_prt_state(adev);
1612 }
1613
1614 /**
1615  * amdgpu_vm_prt_put - drop a PRT user
1616  */
1617 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1618 {
1619         if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1620                 amdgpu_vm_update_prt_state(adev);
1621 }
1622
1623 /**
1624  * amdgpu_vm_prt_cb - callback for updating the PRT status
1625  */
1626 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1627 {
1628         struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1629
1630         amdgpu_vm_prt_put(cb->adev);
1631         kfree(cb);
1632 }
1633
1634 /**
1635  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1636  */
1637 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1638                                  struct dma_fence *fence)
1639 {
1640         struct amdgpu_prt_cb *cb;
1641
1642         if (!adev->gmc.gmc_funcs->set_prt)
1643                 return;
1644
1645         cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1646         if (!cb) {
1647                 /* Last resort when we are OOM */
1648                 if (fence)
1649                         dma_fence_wait(fence, false);
1650
1651                 amdgpu_vm_prt_put(adev);
1652         } else {
1653                 cb->adev = adev;
1654                 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1655                                                      amdgpu_vm_prt_cb))
1656                         amdgpu_vm_prt_cb(fence, &cb->cb);
1657         }
1658 }
1659
1660 /**
1661  * amdgpu_vm_free_mapping - free a mapping
1662  *
1663  * @adev: amdgpu_device pointer
1664  * @vm: requested vm
1665  * @mapping: mapping to be freed
1666  * @fence: fence of the unmap operation
1667  *
1668  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1669  */
1670 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1671                                    struct amdgpu_vm *vm,
1672                                    struct amdgpu_bo_va_mapping *mapping,
1673                                    struct dma_fence *fence)
1674 {
1675         if (mapping->flags & AMDGPU_PTE_PRT)
1676                 amdgpu_vm_add_prt_cb(adev, fence);
1677         kfree(mapping);
1678 }
1679
1680 /**
1681  * amdgpu_vm_prt_fini - finish all prt mappings
1682  *
1683  * @adev: amdgpu_device pointer
1684  * @vm: requested vm
1685  *
1686  * Register a cleanup callback to disable PRT support after VM dies.
1687  */
1688 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1689 {
1690         struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1691         struct dma_fence *excl, **shared;
1692         unsigned i, shared_count;
1693         int r;
1694
1695         r = reservation_object_get_fences_rcu(resv, &excl,
1696                                               &shared_count, &shared);
1697         if (r) {
1698                 /* Not enough memory to grab the fence list, as last resort
1699                  * block for all the fences to complete.
1700                  */
1701                 reservation_object_wait_timeout_rcu(resv, true, false,
1702                                                     MAX_SCHEDULE_TIMEOUT);
1703                 return;
1704         }
1705
1706         /* Add a callback for each fence in the reservation object */
1707         amdgpu_vm_prt_get(adev);
1708         amdgpu_vm_add_prt_cb(adev, excl);
1709
1710         for (i = 0; i < shared_count; ++i) {
1711                 amdgpu_vm_prt_get(adev);
1712                 amdgpu_vm_add_prt_cb(adev, shared[i]);
1713         }
1714
1715         kfree(shared);
1716 }
1717
1718 /**
1719  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1720  *
1721  * @adev: amdgpu_device pointer
1722  * @vm: requested vm
1723  * @fence: optional resulting fence (unchanged if no work needed to be done
1724  * or if an error occurred)
1725  *
1726  * Make sure all freed BOs are cleared in the PT.
1727  * Returns 0 for success.
1728  *
1729  * PTs have to be reserved and mutex must be locked!
1730  */
1731 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1732                           struct amdgpu_vm *vm,
1733                           struct dma_fence **fence)
1734 {
1735         struct amdgpu_bo_va_mapping *mapping;
1736         uint64_t init_pte_value = 0;
1737         struct dma_fence *f = NULL;
1738         int r;
1739
1740         while (!list_empty(&vm->freed)) {
1741                 mapping = list_first_entry(&vm->freed,
1742                         struct amdgpu_bo_va_mapping, list);
1743                 list_del(&mapping->list);
1744
1745                 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1746                         init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1747
1748                 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
1749                                                 mapping->start, mapping->last,
1750                                                 init_pte_value, 0, &f);
1751                 amdgpu_vm_free_mapping(adev, vm, mapping, f);
1752                 if (r) {
1753                         dma_fence_put(f);
1754                         return r;
1755                 }
1756         }
1757
1758         if (fence && f) {
1759                 dma_fence_put(*fence);
1760                 *fence = f;
1761         } else {
1762                 dma_fence_put(f);
1763         }
1764
1765         return 0;
1766
1767 }
1768
1769 /**
1770  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1771  *
1772  * @adev: amdgpu_device pointer
1773  * @vm: requested vm
1774  * @sync: sync object to add fences to
1775  *
1776  * Make sure all BOs which are moved are updated in the PTs.
1777  * Returns 0 for success.
1778  *
1779  * PTs have to be reserved!
1780  */
1781 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1782                            struct amdgpu_vm *vm)
1783 {
1784         bool clear;
1785         int r = 0;
1786
1787         spin_lock(&vm->moved_lock);
1788         while (!list_empty(&vm->moved)) {
1789                 struct amdgpu_bo_va *bo_va;
1790                 struct reservation_object *resv;
1791
1792                 bo_va = list_first_entry(&vm->moved,
1793                         struct amdgpu_bo_va, base.vm_status);
1794                 spin_unlock(&vm->moved_lock);
1795
1796                 resv = bo_va->base.bo->tbo.resv;
1797
1798                 /* Per VM BOs never need to bo cleared in the page tables */
1799                 if (resv == vm->root.base.bo->tbo.resv)
1800                         clear = false;
1801                 /* Try to reserve the BO to avoid clearing its ptes */
1802                 else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
1803                         clear = false;
1804                 /* Somebody else is using the BO right now */
1805                 else
1806                         clear = true;
1807
1808                 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1809                 if (r)
1810                         return r;
1811
1812                 if (!clear && resv != vm->root.base.bo->tbo.resv)
1813                         reservation_object_unlock(resv);
1814
1815                 spin_lock(&vm->moved_lock);
1816         }
1817         spin_unlock(&vm->moved_lock);
1818
1819         return r;
1820 }
1821
1822 /**
1823  * amdgpu_vm_bo_add - add a bo to a specific vm
1824  *
1825  * @adev: amdgpu_device pointer
1826  * @vm: requested vm
1827  * @bo: amdgpu buffer object
1828  *
1829  * Add @bo into the requested vm.
1830  * Add @bo to the list of bos associated with the vm
1831  * Returns newly added bo_va or NULL for failure
1832  *
1833  * Object has to be reserved!
1834  */
1835 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1836                                       struct amdgpu_vm *vm,
1837                                       struct amdgpu_bo *bo)
1838 {
1839         struct amdgpu_bo_va *bo_va;
1840
1841         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1842         if (bo_va == NULL) {
1843                 return NULL;
1844         }
1845         amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1846
1847         bo_va->ref_count = 1;
1848         INIT_LIST_HEAD(&bo_va->valids);
1849         INIT_LIST_HEAD(&bo_va->invalids);
1850
1851         return bo_va;
1852 }
1853
1854
1855 /**
1856  * amdgpu_vm_bo_insert_mapping - insert a new mapping
1857  *
1858  * @adev: amdgpu_device pointer
1859  * @bo_va: bo_va to store the address
1860  * @mapping: the mapping to insert
1861  *
1862  * Insert a new mapping into all structures.
1863  */
1864 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1865                                     struct amdgpu_bo_va *bo_va,
1866                                     struct amdgpu_bo_va_mapping *mapping)
1867 {
1868         struct amdgpu_vm *vm = bo_va->base.vm;
1869         struct amdgpu_bo *bo = bo_va->base.bo;
1870
1871         mapping->bo_va = bo_va;
1872         list_add(&mapping->list, &bo_va->invalids);
1873         amdgpu_vm_it_insert(mapping, &vm->va);
1874
1875         if (mapping->flags & AMDGPU_PTE_PRT)
1876                 amdgpu_vm_prt_get(adev);
1877
1878         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1879                 spin_lock(&vm->moved_lock);
1880                 if (list_empty(&bo_va->base.vm_status))
1881                         list_add(&bo_va->base.vm_status, &vm->moved);
1882                 spin_unlock(&vm->moved_lock);
1883         }
1884         trace_amdgpu_vm_bo_map(bo_va, mapping);
1885 }
1886
1887 /**
1888  * amdgpu_vm_bo_map - map bo inside a vm
1889  *
1890  * @adev: amdgpu_device pointer
1891  * @bo_va: bo_va to store the address
1892  * @saddr: where to map the BO
1893  * @offset: requested offset in the BO
1894  * @flags: attributes of pages (read/write/valid/etc.)
1895  *
1896  * Add a mapping of the BO at the specefied addr into the VM.
1897  * Returns 0 for success, error for failure.
1898  *
1899  * Object has to be reserved and unreserved outside!
1900  */
1901 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1902                      struct amdgpu_bo_va *bo_va,
1903                      uint64_t saddr, uint64_t offset,
1904                      uint64_t size, uint64_t flags)
1905 {
1906         struct amdgpu_bo_va_mapping *mapping, *tmp;
1907         struct amdgpu_bo *bo = bo_va->base.bo;
1908         struct amdgpu_vm *vm = bo_va->base.vm;
1909         uint64_t eaddr;
1910
1911         /* validate the parameters */
1912         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1913             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1914                 return -EINVAL;
1915
1916         /* make sure object fit at this offset */
1917         eaddr = saddr + size - 1;
1918         if (saddr >= eaddr ||
1919             (bo && offset + size > amdgpu_bo_size(bo)))
1920                 return -EINVAL;
1921
1922         saddr /= AMDGPU_GPU_PAGE_SIZE;
1923         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1924
1925         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1926         if (tmp) {
1927                 /* bo and tmp overlap, invalid addr */
1928                 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1929                         "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1930                         tmp->start, tmp->last + 1);
1931                 return -EINVAL;
1932         }
1933
1934         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1935         if (!mapping)
1936                 return -ENOMEM;
1937
1938         mapping->start = saddr;
1939         mapping->last = eaddr;
1940         mapping->offset = offset;
1941         mapping->flags = flags;
1942
1943         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1944
1945         return 0;
1946 }
1947
1948 /**
1949  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1950  *
1951  * @adev: amdgpu_device pointer
1952  * @bo_va: bo_va to store the address
1953  * @saddr: where to map the BO
1954  * @offset: requested offset in the BO
1955  * @flags: attributes of pages (read/write/valid/etc.)
1956  *
1957  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1958  * mappings as we do so.
1959  * Returns 0 for success, error for failure.
1960  *
1961  * Object has to be reserved and unreserved outside!
1962  */
1963 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1964                              struct amdgpu_bo_va *bo_va,
1965                              uint64_t saddr, uint64_t offset,
1966                              uint64_t size, uint64_t flags)
1967 {
1968         struct amdgpu_bo_va_mapping *mapping;
1969         struct amdgpu_bo *bo = bo_va->base.bo;
1970         uint64_t eaddr;
1971         int r;
1972
1973         /* validate the parameters */
1974         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1975             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1976                 return -EINVAL;
1977
1978         /* make sure object fit at this offset */
1979         eaddr = saddr + size - 1;
1980         if (saddr >= eaddr ||
1981             (bo && offset + size > amdgpu_bo_size(bo)))
1982                 return -EINVAL;
1983
1984         /* Allocate all the needed memory */
1985         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1986         if (!mapping)
1987                 return -ENOMEM;
1988
1989         r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1990         if (r) {
1991                 kfree(mapping);
1992                 return r;
1993         }
1994
1995         saddr /= AMDGPU_GPU_PAGE_SIZE;
1996         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1997
1998         mapping->start = saddr;
1999         mapping->last = eaddr;
2000         mapping->offset = offset;
2001         mapping->flags = flags;
2002
2003         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2004
2005         return 0;
2006 }
2007
2008 /**
2009  * amdgpu_vm_bo_unmap - remove bo mapping from vm
2010  *
2011  * @adev: amdgpu_device pointer
2012  * @bo_va: bo_va to remove the address from
2013  * @saddr: where to the BO is mapped
2014  *
2015  * Remove a mapping of the BO at the specefied addr from the VM.
2016  * Returns 0 for success, error for failure.
2017  *
2018  * Object has to be reserved and unreserved outside!
2019  */
2020 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2021                        struct amdgpu_bo_va *bo_va,
2022                        uint64_t saddr)
2023 {
2024         struct amdgpu_bo_va_mapping *mapping;
2025         struct amdgpu_vm *vm = bo_va->base.vm;
2026         bool valid = true;
2027
2028         saddr /= AMDGPU_GPU_PAGE_SIZE;
2029
2030         list_for_each_entry(mapping, &bo_va->valids, list) {
2031                 if (mapping->start == saddr)
2032                         break;
2033         }
2034
2035         if (&mapping->list == &bo_va->valids) {
2036                 valid = false;
2037
2038                 list_for_each_entry(mapping, &bo_va->invalids, list) {
2039                         if (mapping->start == saddr)
2040                                 break;
2041                 }
2042
2043                 if (&mapping->list == &bo_va->invalids)
2044                         return -ENOENT;
2045         }
2046
2047         list_del(&mapping->list);
2048         amdgpu_vm_it_remove(mapping, &vm->va);
2049         mapping->bo_va = NULL;
2050         trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2051
2052         if (valid)
2053                 list_add(&mapping->list, &vm->freed);
2054         else
2055                 amdgpu_vm_free_mapping(adev, vm, mapping,
2056                                        bo_va->last_pt_update);
2057
2058         return 0;
2059 }
2060
2061 /**
2062  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2063  *
2064  * @adev: amdgpu_device pointer
2065  * @vm: VM structure to use
2066  * @saddr: start of the range
2067  * @size: size of the range
2068  *
2069  * Remove all mappings in a range, split them as appropriate.
2070  * Returns 0 for success, error for failure.
2071  */
2072 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2073                                 struct amdgpu_vm *vm,
2074                                 uint64_t saddr, uint64_t size)
2075 {
2076         struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2077         LIST_HEAD(removed);
2078         uint64_t eaddr;
2079
2080         eaddr = saddr + size - 1;
2081         saddr /= AMDGPU_GPU_PAGE_SIZE;
2082         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2083
2084         /* Allocate all the needed memory */
2085         before = kzalloc(sizeof(*before), GFP_KERNEL);
2086         if (!before)
2087                 return -ENOMEM;
2088         INIT_LIST_HEAD(&before->list);
2089
2090         after = kzalloc(sizeof(*after), GFP_KERNEL);
2091         if (!after) {
2092                 kfree(before);
2093                 return -ENOMEM;
2094         }
2095         INIT_LIST_HEAD(&after->list);
2096
2097         /* Now gather all removed mappings */
2098         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2099         while (tmp) {
2100                 /* Remember mapping split at the start */
2101                 if (tmp->start < saddr) {
2102                         before->start = tmp->start;
2103                         before->last = saddr - 1;
2104                         before->offset = tmp->offset;
2105                         before->flags = tmp->flags;
2106                         list_add(&before->list, &tmp->list);
2107                 }
2108
2109                 /* Remember mapping split at the end */
2110                 if (tmp->last > eaddr) {
2111                         after->start = eaddr + 1;
2112                         after->last = tmp->last;
2113                         after->offset = tmp->offset;
2114                         after->offset += after->start - tmp->start;
2115                         after->flags = tmp->flags;
2116                         list_add(&after->list, &tmp->list);
2117                 }
2118
2119                 list_del(&tmp->list);
2120                 list_add(&tmp->list, &removed);
2121
2122                 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2123         }
2124
2125         /* And free them up */
2126         list_for_each_entry_safe(tmp, next, &removed, list) {
2127                 amdgpu_vm_it_remove(tmp, &vm->va);
2128                 list_del(&tmp->list);
2129
2130                 if (tmp->start < saddr)
2131                     tmp->start = saddr;
2132                 if (tmp->last > eaddr)
2133                     tmp->last = eaddr;
2134
2135                 tmp->bo_va = NULL;
2136                 list_add(&tmp->list, &vm->freed);
2137                 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2138         }
2139
2140         /* Insert partial mapping before the range */
2141         if (!list_empty(&before->list)) {
2142                 amdgpu_vm_it_insert(before, &vm->va);
2143                 if (before->flags & AMDGPU_PTE_PRT)
2144                         amdgpu_vm_prt_get(adev);
2145         } else {
2146                 kfree(before);
2147         }
2148
2149         /* Insert partial mapping after the range */
2150         if (!list_empty(&after->list)) {
2151                 amdgpu_vm_it_insert(after, &vm->va);
2152                 if (after->flags & AMDGPU_PTE_PRT)
2153                         amdgpu_vm_prt_get(adev);
2154         } else {
2155                 kfree(after);
2156         }
2157
2158         return 0;
2159 }
2160
2161 /**
2162  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2163  *
2164  * @vm: the requested VM
2165  *
2166  * Find a mapping by it's address.
2167  */
2168 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2169                                                          uint64_t addr)
2170 {
2171         return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2172 }
2173
2174 /**
2175  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2176  *
2177  * @adev: amdgpu_device pointer
2178  * @bo_va: requested bo_va
2179  *
2180  * Remove @bo_va->bo from the requested vm.
2181  *
2182  * Object have to be reserved!
2183  */
2184 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2185                       struct amdgpu_bo_va *bo_va)
2186 {
2187         struct amdgpu_bo_va_mapping *mapping, *next;
2188         struct amdgpu_vm *vm = bo_va->base.vm;
2189
2190         list_del(&bo_va->base.bo_list);
2191
2192         spin_lock(&vm->moved_lock);
2193         list_del(&bo_va->base.vm_status);
2194         spin_unlock(&vm->moved_lock);
2195
2196         list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2197                 list_del(&mapping->list);
2198                 amdgpu_vm_it_remove(mapping, &vm->va);
2199                 mapping->bo_va = NULL;
2200                 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2201                 list_add(&mapping->list, &vm->freed);
2202         }
2203         list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2204                 list_del(&mapping->list);
2205                 amdgpu_vm_it_remove(mapping, &vm->va);
2206                 amdgpu_vm_free_mapping(adev, vm, mapping,
2207                                        bo_va->last_pt_update);
2208         }
2209
2210         dma_fence_put(bo_va->last_pt_update);
2211         kfree(bo_va);
2212 }
2213
2214 /**
2215  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2216  *
2217  * @adev: amdgpu_device pointer
2218  * @vm: requested vm
2219  * @bo: amdgpu buffer object
2220  *
2221  * Mark @bo as invalid.
2222  */
2223 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2224                              struct amdgpu_bo *bo, bool evicted)
2225 {
2226         struct amdgpu_vm_bo_base *bo_base;
2227
2228         /* shadow bo doesn't have bo base, its validation needs its parent */
2229         if (bo->parent && bo->parent->shadow == bo)
2230                 bo = bo->parent;
2231
2232         list_for_each_entry(bo_base, &bo->va, bo_list) {
2233                 struct amdgpu_vm *vm = bo_base->vm;
2234
2235                 bo_base->moved = true;
2236                 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2237                         if (bo->tbo.type == ttm_bo_type_kernel)
2238                                 list_move(&bo_base->vm_status, &vm->evicted);
2239                         else
2240                                 list_move_tail(&bo_base->vm_status,
2241                                                &vm->evicted);
2242                         continue;
2243                 }
2244
2245                 if (bo->tbo.type == ttm_bo_type_kernel) {
2246                         if (list_empty(&bo_base->vm_status))
2247                                 list_add(&bo_base->vm_status, &vm->relocated);
2248                         continue;
2249                 }
2250
2251                 spin_lock(&bo_base->vm->moved_lock);
2252                 if (list_empty(&bo_base->vm_status))
2253                         list_add(&bo_base->vm_status, &vm->moved);
2254                 spin_unlock(&bo_base->vm->moved_lock);
2255         }
2256 }
2257
2258 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2259 {
2260         /* Total bits covered by PD + PTs */
2261         unsigned bits = ilog2(vm_size) + 18;
2262
2263         /* Make sure the PD is 4K in size up to 8GB address space.
2264            Above that split equal between PD and PTs */
2265         if (vm_size <= 8)
2266                 return (bits - 9);
2267         else
2268                 return ((bits + 3) / 2);
2269 }
2270
2271 /**
2272  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2273  *
2274  * @adev: amdgpu_device pointer
2275  * @vm_size: the default vm size if it's set auto
2276  */
2277 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2278                            uint32_t fragment_size_default, unsigned max_level,
2279                            unsigned max_bits)
2280 {
2281         uint64_t tmp;
2282
2283         /* adjust vm size first */
2284         if (amdgpu_vm_size != -1) {
2285                 unsigned max_size = 1 << (max_bits - 30);
2286
2287                 vm_size = amdgpu_vm_size;
2288                 if (vm_size > max_size) {
2289                         dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2290                                  amdgpu_vm_size, max_size);
2291                         vm_size = max_size;
2292                 }
2293         }
2294
2295         adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2296
2297         tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2298         if (amdgpu_vm_block_size != -1)
2299                 tmp >>= amdgpu_vm_block_size - 9;
2300         tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2301         adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2302         switch (adev->vm_manager.num_level) {
2303         case 3:
2304                 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2305                 break;
2306         case 2:
2307                 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2308                 break;
2309         case 1:
2310                 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2311                 break;
2312         default:
2313                 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2314         }
2315         /* block size depends on vm size and hw setup*/
2316         if (amdgpu_vm_block_size != -1)
2317                 adev->vm_manager.block_size =
2318                         min((unsigned)amdgpu_vm_block_size, max_bits
2319                             - AMDGPU_GPU_PAGE_SHIFT
2320                             - 9 * adev->vm_manager.num_level);
2321         else if (adev->vm_manager.num_level > 1)
2322                 adev->vm_manager.block_size = 9;
2323         else
2324                 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2325
2326         if (amdgpu_vm_fragment_size == -1)
2327                 adev->vm_manager.fragment_size = fragment_size_default;
2328         else
2329                 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2330
2331         DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2332                  vm_size, adev->vm_manager.num_level + 1,
2333                  adev->vm_manager.block_size,
2334                  adev->vm_manager.fragment_size);
2335 }
2336
2337 /**
2338  * amdgpu_vm_init - initialize a vm instance
2339  *
2340  * @adev: amdgpu_device pointer
2341  * @vm: requested vm
2342  * @vm_context: Indicates if it GFX or Compute context
2343  *
2344  * Init @vm fields.
2345  */
2346 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2347                    int vm_context, unsigned int pasid)
2348 {
2349         struct amdgpu_bo_param bp;
2350         struct amdgpu_bo *root;
2351         const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2352                 AMDGPU_VM_PTE_COUNT(adev) * 8);
2353         unsigned ring_instance;
2354         struct amdgpu_ring *ring;
2355         struct drm_sched_rq *rq;
2356         unsigned long size;
2357         uint64_t flags;
2358         int r, i;
2359
2360         vm->va = RB_ROOT_CACHED;
2361         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2362                 vm->reserved_vmid[i] = NULL;
2363         INIT_LIST_HEAD(&vm->evicted);
2364         INIT_LIST_HEAD(&vm->relocated);
2365         spin_lock_init(&vm->moved_lock);
2366         INIT_LIST_HEAD(&vm->moved);
2367         INIT_LIST_HEAD(&vm->freed);
2368
2369         /* create scheduler entity for page table updates */
2370
2371         ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2372         ring_instance %= adev->vm_manager.vm_pte_num_rings;
2373         ring = adev->vm_manager.vm_pte_rings[ring_instance];
2374         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2375         r = drm_sched_entity_init(&ring->sched, &vm->entity,
2376                                   rq, NULL);
2377         if (r)
2378                 return r;
2379
2380         vm->pte_support_ats = false;
2381
2382         if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2383                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2384                                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2385
2386                 if (adev->asic_type == CHIP_RAVEN)
2387                         vm->pte_support_ats = true;
2388         } else {
2389                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2390                                                 AMDGPU_VM_USE_CPU_FOR_GFX);
2391         }
2392         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2393                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2394         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2395                   "CPU update of VM recommended only for large BAR system\n");
2396         vm->last_update = NULL;
2397
2398         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2399         if (vm->use_cpu_for_update)
2400                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2401         else
2402                 flags |= AMDGPU_GEM_CREATE_SHADOW;
2403
2404         size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2405         memset(&bp, 0, sizeof(bp));
2406         bp.size = size;
2407         bp.byte_align = align;
2408         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2409         bp.flags = flags;
2410         bp.type = ttm_bo_type_kernel;
2411         bp.resv = NULL;
2412         r = amdgpu_bo_create(adev, &bp, &root);
2413         if (r)
2414                 goto error_free_sched_entity;
2415
2416         r = amdgpu_bo_reserve(root, true);
2417         if (r)
2418                 goto error_free_root;
2419
2420         r = amdgpu_vm_clear_bo(adev, vm, root,
2421                                adev->vm_manager.root_level,
2422                                vm->pte_support_ats);
2423         if (r)
2424                 goto error_unreserve;
2425
2426         amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2427         amdgpu_bo_unreserve(vm->root.base.bo);
2428
2429         if (pasid) {
2430                 unsigned long flags;
2431
2432                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2433                 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2434                               GFP_ATOMIC);
2435                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2436                 if (r < 0)
2437                         goto error_free_root;
2438
2439                 vm->pasid = pasid;
2440         }
2441
2442         INIT_KFIFO(vm->faults);
2443         vm->fault_credit = 16;
2444
2445         return 0;
2446
2447 error_unreserve:
2448         amdgpu_bo_unreserve(vm->root.base.bo);
2449
2450 error_free_root:
2451         amdgpu_bo_unref(&vm->root.base.bo->shadow);
2452         amdgpu_bo_unref(&vm->root.base.bo);
2453         vm->root.base.bo = NULL;
2454
2455 error_free_sched_entity:
2456         drm_sched_entity_fini(&ring->sched, &vm->entity);
2457
2458         return r;
2459 }
2460
2461 /**
2462  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2463  *
2464  * This only works on GFX VMs that don't have any BOs added and no
2465  * page tables allocated yet.
2466  *
2467  * Changes the following VM parameters:
2468  * - use_cpu_for_update
2469  * - pte_supports_ats
2470  * - pasid (old PASID is released, because compute manages its own PASIDs)
2471  *
2472  * Reinitializes the page directory to reflect the changed ATS
2473  * setting. May leave behind an unused shadow BO for the page
2474  * directory when switching from SDMA updates to CPU updates.
2475  *
2476  * Returns 0 for success, -errno for errors.
2477  */
2478 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2479 {
2480         bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2481         int r;
2482
2483         r = amdgpu_bo_reserve(vm->root.base.bo, true);
2484         if (r)
2485                 return r;
2486
2487         /* Sanity checks */
2488         if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2489                 r = -EINVAL;
2490                 goto error;
2491         }
2492
2493         /* Check if PD needs to be reinitialized and do it before
2494          * changing any other state, in case it fails.
2495          */
2496         if (pte_support_ats != vm->pte_support_ats) {
2497                 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2498                                adev->vm_manager.root_level,
2499                                pte_support_ats);
2500                 if (r)
2501                         goto error;
2502         }
2503
2504         /* Update VM state */
2505         vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2506                                     AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2507         vm->pte_support_ats = pte_support_ats;
2508         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2509                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2510         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2511                   "CPU update of VM recommended only for large BAR system\n");
2512
2513         if (vm->pasid) {
2514                 unsigned long flags;
2515
2516                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2517                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2518                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2519
2520                 vm->pasid = 0;
2521         }
2522
2523 error:
2524         amdgpu_bo_unreserve(vm->root.base.bo);
2525         return r;
2526 }
2527
2528 /**
2529  * amdgpu_vm_free_levels - free PD/PT levels
2530  *
2531  * @adev: amdgpu device structure
2532  * @parent: PD/PT starting level to free
2533  * @level: level of parent structure
2534  *
2535  * Free the page directory or page table level and all sub levels.
2536  */
2537 static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2538                                   struct amdgpu_vm_pt *parent,
2539                                   unsigned level)
2540 {
2541         unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
2542
2543         if (parent->base.bo) {
2544                 list_del(&parent->base.bo_list);
2545                 list_del(&parent->base.vm_status);
2546                 amdgpu_bo_unref(&parent->base.bo->shadow);
2547                 amdgpu_bo_unref(&parent->base.bo);
2548         }
2549
2550         if (parent->entries)
2551                 for (i = 0; i < num_entries; i++)
2552                         amdgpu_vm_free_levels(adev, &parent->entries[i],
2553                                               level + 1);
2554
2555         kvfree(parent->entries);
2556 }
2557
2558 /**
2559  * amdgpu_vm_fini - tear down a vm instance
2560  *
2561  * @adev: amdgpu_device pointer
2562  * @vm: requested vm
2563  *
2564  * Tear down @vm.
2565  * Unbind the VM and remove all bos from the vm bo list
2566  */
2567 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2568 {
2569         struct amdgpu_bo_va_mapping *mapping, *tmp;
2570         bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2571         struct amdgpu_bo *root;
2572         u64 fault;
2573         int i, r;
2574
2575         amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2576
2577         /* Clear pending page faults from IH when the VM is destroyed */
2578         while (kfifo_get(&vm->faults, &fault))
2579                 amdgpu_ih_clear_fault(adev, fault);
2580
2581         if (vm->pasid) {
2582                 unsigned long flags;
2583
2584                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2585                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2586                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2587         }
2588
2589         drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2590
2591         if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2592                 dev_err(adev->dev, "still active bo inside vm\n");
2593         }
2594         rbtree_postorder_for_each_entry_safe(mapping, tmp,
2595                                              &vm->va.rb_root, rb) {
2596                 list_del(&mapping->list);
2597                 amdgpu_vm_it_remove(mapping, &vm->va);
2598                 kfree(mapping);
2599         }
2600         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2601                 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2602                         amdgpu_vm_prt_fini(adev, vm);
2603                         prt_fini_needed = false;
2604                 }
2605
2606                 list_del(&mapping->list);
2607                 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2608         }
2609
2610         root = amdgpu_bo_ref(vm->root.base.bo);
2611         r = amdgpu_bo_reserve(root, true);
2612         if (r) {
2613                 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2614         } else {
2615                 amdgpu_vm_free_levels(adev, &vm->root,
2616                                       adev->vm_manager.root_level);
2617                 amdgpu_bo_unreserve(root);
2618         }
2619         amdgpu_bo_unref(&root);
2620         dma_fence_put(vm->last_update);
2621         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2622                 amdgpu_vmid_free_reserved(adev, vm, i);
2623 }
2624
2625 /**
2626  * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2627  *
2628  * @adev: amdgpu_device pointer
2629  * @pasid: PASID do identify the VM
2630  *
2631  * This function is expected to be called in interrupt context. Returns
2632  * true if there was fault credit, false otherwise
2633  */
2634 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2635                                   unsigned int pasid)
2636 {
2637         struct amdgpu_vm *vm;
2638
2639         spin_lock(&adev->vm_manager.pasid_lock);
2640         vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2641         if (!vm) {
2642                 /* VM not found, can't track fault credit */
2643                 spin_unlock(&adev->vm_manager.pasid_lock);
2644                 return true;
2645         }
2646
2647         /* No lock needed. only accessed by IRQ handler */
2648         if (!vm->fault_credit) {
2649                 /* Too many faults in this VM */
2650                 spin_unlock(&adev->vm_manager.pasid_lock);
2651                 return false;
2652         }
2653
2654         vm->fault_credit--;
2655         spin_unlock(&adev->vm_manager.pasid_lock);
2656         return true;
2657 }
2658
2659 /**
2660  * amdgpu_vm_manager_init - init the VM manager
2661  *
2662  * @adev: amdgpu_device pointer
2663  *
2664  * Initialize the VM manager structures
2665  */
2666 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2667 {
2668         unsigned i;
2669
2670         amdgpu_vmid_mgr_init(adev);
2671
2672         adev->vm_manager.fence_context =
2673                 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2674         for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2675                 adev->vm_manager.seqno[i] = 0;
2676
2677         atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2678         spin_lock_init(&adev->vm_manager.prt_lock);
2679         atomic_set(&adev->vm_manager.num_prt_users, 0);
2680
2681         /* If not overridden by the user, by default, only in large BAR systems
2682          * Compute VM tables will be updated by CPU
2683          */
2684 #ifdef CONFIG_X86_64
2685         if (amdgpu_vm_update_mode == -1) {
2686                 if (amdgpu_vm_is_large_bar(adev))
2687                         adev->vm_manager.vm_update_mode =
2688                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2689                 else
2690                         adev->vm_manager.vm_update_mode = 0;
2691         } else
2692                 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2693 #else
2694         adev->vm_manager.vm_update_mode = 0;
2695 #endif
2696
2697         idr_init(&adev->vm_manager.pasid_idr);
2698         spin_lock_init(&adev->vm_manager.pasid_lock);
2699 }
2700
2701 /**
2702  * amdgpu_vm_manager_fini - cleanup VM manager
2703  *
2704  * @adev: amdgpu_device pointer
2705  *
2706  * Cleanup the VM manager and free resources.
2707  */
2708 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2709 {
2710         WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2711         idr_destroy(&adev->vm_manager.pasid_idr);
2712
2713         amdgpu_vmid_mgr_fini(adev);
2714 }
2715
2716 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2717 {
2718         union drm_amdgpu_vm *args = data;
2719         struct amdgpu_device *adev = dev->dev_private;
2720         struct amdgpu_fpriv *fpriv = filp->driver_priv;
2721         int r;
2722
2723         switch (args->in.op) {
2724         case AMDGPU_VM_OP_RESERVE_VMID:
2725                 /* current, we only have requirement to reserve vmid from gfxhub */
2726                 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2727                 if (r)
2728                         return r;
2729                 break;
2730         case AMDGPU_VM_OP_UNRESERVE_VMID:
2731                 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2732                 break;
2733         default:
2734                 return -EINVAL;
2735         }
2736
2737         return 0;
2738 }