]> Git Repo - J-linux.git/commitdiff
Merge tag 'amd-drm-next-6.10-2024-04-26' of https://gitlab.freedesktop.org/agd5f...
authorDave Airlie <[email protected]>
Tue, 30 Apr 2024 04:42:54 +0000 (14:42 +1000)
committerDave Airlie <[email protected]>
Tue, 30 Apr 2024 04:43:00 +0000 (14:43 +1000)
amd-drm-next-6.10-2024-04-26:

amdgpu:
- Misc code cleanups and refactors
- Support setting reset method at runtime
- Report OD status
- SMU 14.0.1 fixes
- SDMA 4.4.2 fixes
- VPE fixes
- MES fixes
- Update BO eviction priorities
- UMSCH fixes
- Reset fixes
- Freesync fixes
- GFXIP 9.4.3 fixes
- SDMA 5.2 fixes
- MES UAF fix
- RAS updates
- Devcoredump updates for dumping IP state
- DSC fixes
- JPEG fix
- Fix VRAM memory accounting
- VCN 5.0 fixes
- MES fixes
- UMC 12.0 updates
- Modify contiguous flags handling
- Initial support for mapping kernel queues via MES

amdkfd:
- Fix rescheduling of restore worker
- VRAM accounting for SVM migrations
- mGPU fix
- Enable SQ watchpoint for gfx10

Signed-off-by: Dave Airlie <[email protected]>
From: Alex Deucher <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1  2 
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 83c499fda14cc12396c598d089aa2a85ca4f5ba6,706345ea14301c0a349a47af1f0796f41a20023d..b2a83c802bbd764593467cee1f5aaab32281fd0a
@@@ -39,7 -39,6 +39,7 @@@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
 +#include "amdgpu_vram_mgr.h"
  
  /**
   * DOC: amdgpu_object
@@@ -154,8 -153,10 +154,10 @@@ void amdgpu_bo_placement_from_domain(st
                else
                        places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  
-               if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+               if (abo->tbo.type == ttm_bo_type_kernel &&
+                   flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
                        places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
                c++;
        }
  
@@@ -602,7 -603,8 +604,7 @@@ int amdgpu_bo_create(struct amdgpu_devi
        if (!amdgpu_bo_support_uswc(bo->flags))
                bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  
 -      if (adev->ras_enabled)
 -              bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 +      bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
  
        bo->tbo.bdev = &adev->mman.bdev;
        if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
            bo->tbo.resource->mem_type == TTM_PL_VRAM) {
                struct dma_fence *fence;
  
 -              r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
 +              r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
                if (unlikely(r))
                        goto fail_unreserve;
  
@@@ -765,7 -767,7 +767,7 @@@ int amdgpu_bo_restore_shadow(struct amd
  
        return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
                                  amdgpu_bo_size(shadow), NULL, fence,
-                                 true, false, false);
+                                 true, false, 0);
  }
  
  /**
@@@ -967,6 -969,10 +969,10 @@@ int amdgpu_bo_pin_restricted(struct amd
                if (!bo->placements[i].lpfn ||
                    (lpfn && lpfn < bo->placements[i].lpfn))
                        bo->placements[i].lpfn = lpfn;
+               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+                   bo->placements[i].mem_type == TTM_PL_VRAM)
+                       bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
        }
  
        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@@ -1368,9 -1374,8 +1374,9 @@@ void amdgpu_bo_release_notify(struct tt
        if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
                return;
  
 -      r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
 +      r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
        if (!WARN_ON(r)) {
 +              amdgpu_vram_mgr_set_cleared(bo->resource);
                amdgpu_bo_fence(abo, fence, false);
                dma_fence_put(fence);
        }
index 6b48bcf53ce96045794094d90cd5365d0b259858,7805ea4d82f2a6bce65754254e3c3fb8f44fc267..3749892bf70226fbb6d34c06bce2dbf1f02bd949
@@@ -236,7 -236,7 +236,7 @@@ static int amdgpu_ttm_map_buffer(struc
        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
        dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
-                               dst_addr, num_bytes, false);
+                               dst_addr, num_bytes, 0);
  
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
@@@ -296,6 -296,8 +296,8 @@@ int amdgpu_ttm_copy_mem_to_mem(struct a
        struct dma_fence *fence = NULL;
        int r = 0;
  
+       uint32_t copy_flags = 0;
        if (!adev->mman.buffer_funcs_enabled) {
                DRM_ERROR("Trying to move memory with ring turned off.\n");
                return -EINVAL;
                if (r)
                        goto error;
  
-               r = amdgpu_copy_buffer(ring, from, to, cur_size,
-                                      resv, &next, false, true, tmz);
+               if (tmz)
+                       copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+               r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+                                      &next, false, true, copy_flags);
                if (r)
                        goto error;
  
@@@ -378,12 -383,11 +383,12 @@@ static int amdgpu_move_blit(struct ttm_
            (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
                struct dma_fence *wipe_fence = NULL;
  
 -              r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
 -                                      false);
 +              r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
 +                                     false);
                if (r) {
                        goto error;
                } else if (wipe_fence) {
 +                      amdgpu_vram_mgr_set_cleared(bo->resource);
                        dma_fence_put(fence);
                        fence = wipe_fence;
                }
@@@ -1489,7 -1493,7 +1494,7 @@@ static int amdgpu_ttm_access_memory_sdm
                swap(src_addr, dst_addr);
  
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
-                               PAGE_SIZE, false);
+                               PAGE_SIZE, 0);
  
        amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
@@@ -2140,7 -2144,7 +2145,7 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                       uint64_t dst_offset, uint32_t byte_count,
                       struct dma_resv *resv,
                       struct dma_fence **fence, bool direct_submit,
-                      bool vm_needs_flush, bool tmz)
+                      bool vm_needs_flush, uint32_t copy_flags)
  {
        struct amdgpu_device *adev = ring->adev;
        unsigned int num_loops, num_dw;
                uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
  
                amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
-                                       dst_offset, cur_size_in_bytes, tmz);
+                                       dst_offset, cur_size_in_bytes, copy_flags);
                src_offset += cur_size_in_bytes;
                dst_offset += cur_size_in_bytes;
                byte_count -= cur_size_in_bytes;
@@@ -2227,71 -2230,6 +2231,71 @@@ static int amdgpu_ttm_fill_mem(struct a
        return 0;
  }
  
 +/**
 + * amdgpu_ttm_clear_buffer - clear memory buffers
 + * @bo: amdgpu buffer object
 + * @resv: reservation object
 + * @fence: dma_fence associated with the operation
 + *
 + * Clear the memory buffer resource.
 + *
 + * Returns:
 + * 0 for success or a negative error code on failure.
 + */
 +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 +                          struct dma_resv *resv,
 +                          struct dma_fence **fence)
 +{
 +      struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 +      struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 +      struct amdgpu_res_cursor cursor;
 +      u64 addr;
 +      int r;
 +
 +      if (!adev->mman.buffer_funcs_enabled)
 +              return -EINVAL;
 +
 +      if (!fence)
 +              return -EINVAL;
 +
 +      *fence = dma_fence_get_stub();
 +
 +      amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
 +
 +      mutex_lock(&adev->mman.gtt_window_lock);
 +      while (cursor.remaining) {
 +              struct dma_fence *next = NULL;
 +              u64 size;
 +
 +              if (amdgpu_res_cleared(&cursor)) {
 +                      amdgpu_res_next(&cursor, cursor.size);
 +                      continue;
 +              }
 +
 +              /* Never clear more than 256MiB at once to avoid timeouts */
 +              size = min(cursor.size, 256ULL << 20);
 +
 +              r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
 +                                        1, ring, false, &size, &addr);
 +              if (r)
 +                      goto err;
 +
 +              r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
 +                                      &next, true, true);
 +              if (r)
 +                      goto err;
 +
 +              dma_fence_put(*fence);
 +              *fence = next;
 +
 +              amdgpu_res_next(&cursor, size);
 +      }
 +err:
 +      mutex_unlock(&adev->mman.gtt_window_lock);
 +
 +      return r;
 +}
 +
  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct dma_resv *resv,
index 4f5e70ee9ad0de3de1fb78825331b434195422e4,53d5a5990c3110d6c1e7bd50ad3a6d3e77dd60d4..b6f53129dea3011f4cf74d4f632d35ef6a8d988e
@@@ -38,6 -38,8 +38,6 @@@
  #define AMDGPU_GTT_MAX_TRANSFER_SIZE  512
  #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS       2
  
 -#define AMDGPU_POISON 0xd0bed0be
 -
  extern const struct attribute_group amdgpu_vram_mgr_attr_group;
  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
  
@@@ -109,6 -111,8 +109,8 @@@ struct amdgpu_copy_mem 
        unsigned long                   offset;
  };
  
+ #define AMDGPU_COPY_FLAGS_TMZ         (1 << 0)
  int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
  void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
  int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
@@@ -149,16 -153,13 +151,16 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                       uint64_t dst_offset, uint32_t byte_count,
                       struct dma_resv *resv,
                       struct dma_fence **fence, bool direct_submit,
-                      bool vm_needs_flush, bool tmz);
+                      bool vm_needs_flush, uint32_t copy_flags);
  int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                               const struct amdgpu_copy_mem *src,
                               const struct amdgpu_copy_mem *dst,
                               uint64_t size, bool tmz,
                               struct dma_resv *resv,
                               struct dma_fence **f);
 +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 +                          struct dma_resv *resv,
 +                          struct dma_fence **fence);
  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct dma_resv *resv,
index e494f5bf136a1780894fc4be4eedf577da4a1ebd,f23002ed2b42ae50020e7e3b9bff44494633b131..6c30eceec896591337ee9458413a610db2a78f27
@@@ -469,7 -469,7 +469,7 @@@ static int amdgpu_vram_mgr_new(struct t
        if (tbo->type != ttm_bo_type_kernel)
                max_bytes -= AMDGPU_VM_RESERVED_VRAM;
  
-       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
                pages_per_block = ~0ul;
        } else {
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                /* default to 2MB */
                pages_per_block = 2UL << (20UL - PAGE_SHIFT);
  #endif
-               pages_per_block = max_t(uint32_t, pages_per_block,
+               pages_per_block = max_t(u32, pages_per_block,
                                        tbo->page_alignment);
        }
  
        if (place->flags & TTM_PL_FLAG_TOPDOWN)
                vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
  
-       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
                vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
  
 +      if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
 +              vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
 +
        if (fpfn || lpfn != mgr->mm.size)
                /* Allocate blocks in desired range */
                vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
                else
                        min_block_size = mgr->default_page_size;
  
-               BUG_ON(min_block_size < mm->chunk_size);
                /* Limit maximum size to 2GiB due to SG table limitations */
                size = min(remaining_size, 2ULL << 30);
  
                if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
-                               !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+                   !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
                        min_block_size = (u64)pages_per_block << PAGE_SHIFT;
  
+               BUG_ON(min_block_size < mm->chunk_size);
                r = drm_buddy_alloc_blocks(mm, fpfn,
                                           lpfn,
                                           size,
                                           min_block_size,
                                           &vres->blocks,
                                           vres->flags);
+               if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+                   !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+                       vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+                       pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+                                               tbo->page_alignment);
+                       continue;
+               }
                if (unlikely(r))
                        goto error_free_blocks;
  
        return 0;
  
  error_free_blocks:
 -      drm_buddy_free_list(mm, &vres->blocks);
 +      drm_buddy_free_list(mm, &vres->blocks, 0);
        mutex_unlock(&mgr->lock);
  error_fini:
        ttm_resource_fini(man, &vres->base);
@@@ -608,7 -615,7 +618,7 @@@ static void amdgpu_vram_mgr_del(struct 
  
        amdgpu_vram_mgr_do_reserve(man);
  
 -      drm_buddy_free_list(mm, &vres->blocks);
 +      drm_buddy_free_list(mm, &vres->blocks, vres->flags);
        mutex_unlock(&mgr->lock);
  
        atomic64_sub(vis_usage, &mgr->vis_usage);
@@@ -916,7 -923,7 +926,7 @@@ void amdgpu_vram_mgr_fini(struct amdgpu
                kfree(rsv);
  
        list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
 -              drm_buddy_free_list(&mgr->mm, &rsv->allocated);
 +              drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
                kfree(rsv);
        }
        if (!adev->gmc.is_app_apu)
This page took 0.075783 seconds and 4 git commands to generate.