Merge tag 'amd-drm-next-6.10-2024-04-26' of https://gitlab.freedesktop.org/agd5f...

author Dave Airlie <[email protected]>

Tue, 30 Apr 2024 04:42:54 +0000 (14:42 +1000)

committer Dave Airlie <[email protected]>

Tue, 30 Apr 2024 04:43:00 +0000 (14:43 +1000)
author Dave Airlie <[email protected]>
Tue, 30 Apr 2024 04:42:54 +0000 (14:42 +1000)
committer Dave Airlie <[email protected]>
Tue, 30 Apr 2024 04:43:00 +0000 (14:43 +1000)
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 83c499fda14cc12396c598d089aa2a85ca4f5ba6,706345ea14301c0a349a47af1f0796f41a20023d..b2a83c802bbd764593467cee1f5aaab32281fd0a
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@@ -39,7 -39,6 +39,7 @@@
   #include "amdgpu.h"
   #include "amdgpu_trace.h"
   #include "amdgpu_amdkfd.h"
+ +#include "amdgpu_vram_mgr.h"
   
   /**
    * DOC: amdgpu_object
@@@ -154,8 -153,10 +154,10 @@@ void amdgpu_bo_placement_from_domain(st
                 else
                         places[c].flags |= TTM_PL_FLAG_TOPDOWN;
   
-               if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+               if (abo->tbo.type == ttm_bo_type_kernel &&
+                   flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
                         places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ 
                 c++;
         }
   
@@@ -602,7 -603,8 +604,7 @@@ int amdgpu_bo_create(struct amdgpu_devi
         if (!amdgpu_bo_support_uswc(bo->flags))
                 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
   
- -      if (adev->ras_enabled)
- -              bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ +      bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
   
         bo->tbo.bdev = &adev->mman.bdev;
         if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@@ -635,7 -637,7 +637,7 @@@
             bo->tbo.resource->mem_type == TTM_PL_VRAM) {
                 struct dma_fence *fence;
   
- -              r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+ +              r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
                 if (unlikely(r))
                         goto fail_unreserve;
   
@@@ -765,7 -767,7 +767,7 @@@ int amdgpu_bo_restore_shadow(struct amd
   
         return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
                                   amdgpu_bo_size(shadow), NULL, fence,
-                                 true, false, false);
+                                 true, false, 0);
   }
   
   /**
@@@ -967,6 -969,10 +969,10 @@@ int amdgpu_bo_pin_restricted(struct amd
                 if (!bo->placements[i].lpfn ||
                     (lpfn && lpfn < bo->placements[i].lpfn))
                         bo->placements[i].lpfn = lpfn;
+ 
+               if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+                   bo->placements[i].mem_type == TTM_PL_VRAM)
+                       bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
         }
   
         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@@ -1368,9 -1374,8 +1374,9 @@@ void amdgpu_bo_release_notify(struct tt
         if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
                 return;
   
- -      r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+ +      r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
         if (!WARN_ON(r)) {
+ +              amdgpu_vram_mgr_set_cleared(bo->resource);
                 amdgpu_bo_fence(abo, fence, false);
                 dma_fence_put(fence);
         }
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 6b48bcf53ce96045794094d90cd5365d0b259858,7805ea4d82f2a6bce65754254e3c3fb8f44fc267..3749892bf70226fbb6d34c06bce2dbf1f02bd949
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@@ -236,7 -236,7 +236,7 @@@ static int amdgpu_ttm_map_buffer(struc
         dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
         dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
         amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
-                               dst_addr, num_bytes, false);
+                               dst_addr, num_bytes, 0);
   
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
         WARN_ON(job->ibs[0].length_dw > num_dw);
@@@ -296,6 -296,8 +296,8 @@@ int amdgpu_ttm_copy_mem_to_mem(struct a
         struct dma_fence *fence = NULL;
         int r = 0;
   
+       uint32_t copy_flags = 0;
+ 
         if (!adev->mman.buffer_funcs_enabled) {
                 DRM_ERROR("Trying to move memory with ring turned off.\n");
                 return -EINVAL;
@@@ -323,8 -325,11 +325,11 @@@
                 if (r)
                         goto error;
   
-               r = amdgpu_copy_buffer(ring, from, to, cur_size,
-                                      resv, &next, false, true, tmz);
+               if (tmz)
+                       copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ 
+               r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+                                      &next, false, true, copy_flags);
                 if (r)
                         goto error;
   
@@@ -378,12 -383,11 +383,12 @@@ static int amdgpu_move_blit(struct ttm_
             (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
                 struct dma_fence *wipe_fence = NULL;
   
- -              r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
- -                                      false);
+ +              r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ +                                     false);
                 if (r) {
                         goto error;
                 } else if (wipe_fence) {
+ +                      amdgpu_vram_mgr_set_cleared(bo->resource);
                         dma_fence_put(fence);
                         fence = wipe_fence;
                 }
@@@ -1489,7 -1493,7 +1494,7 @@@ static int amdgpu_ttm_access_memory_sdm
                 swap(src_addr, dst_addr);
   
         amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
-                               PAGE_SIZE, false);
+                               PAGE_SIZE, 0);
   
         amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
         WARN_ON(job->ibs[0].length_dw > num_dw);
@@@ -2140,7 -2144,7 +2145,7 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                        uint64_t dst_offset, uint32_t byte_count,
                        struct dma_resv *resv,
                        struct dma_fence **fence, bool direct_submit,
-                      bool vm_needs_flush, bool tmz)
+                      bool vm_needs_flush, uint32_t copy_flags)
   {
         struct amdgpu_device *adev = ring->adev;
         unsigned int num_loops, num_dw;
@@@ -2166,8 -2170,7 +2171,7 @@@
                 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
   
                 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
-                                       dst_offset, cur_size_in_bytes, tmz);
- 
+                                       dst_offset, cur_size_in_bytes, copy_flags);
                 src_offset += cur_size_in_bytes;
                 dst_offset += cur_size_in_bytes;
                 byte_count -= cur_size_in_bytes;
@@@ -2227,71 -2230,6 +2231,71 @@@ static int amdgpu_ttm_fill_mem(struct a
         return 0;
   }
   
+ +/**
+ + * amdgpu_ttm_clear_buffer - clear memory buffers
+ + * @bo: amdgpu buffer object
+ + * @resv: reservation object
+ + * @fence: dma_fence associated with the operation
+ + *
+ + * Clear the memory buffer resource.
+ + *
+ + * Returns:
+ + * 0 for success or a negative error code on failure.
+ + */
+ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ +                          struct dma_resv *resv,
+ +                          struct dma_fence **fence)
+ +{
+ +      struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ +      struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ +      struct amdgpu_res_cursor cursor;
+ +      u64 addr;
+ +      int r;
+ +
+ +      if (!adev->mman.buffer_funcs_enabled)
+ +              return -EINVAL;
+ +
+ +      if (!fence)
+ +              return -EINVAL;
+ +
+ +      *fence = dma_fence_get_stub();
+ +
+ +      amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+ +
+ +      mutex_lock(&adev->mman.gtt_window_lock);
+ +      while (cursor.remaining) {
+ +              struct dma_fence *next = NULL;
+ +              u64 size;
+ +
+ +              if (amdgpu_res_cleared(&cursor)) {
+ +                      amdgpu_res_next(&cursor, cursor.size);
+ +                      continue;
+ +              }
+ +
+ +              /* Never clear more than 256MiB at once to avoid timeouts */
+ +              size = min(cursor.size, 256ULL << 20);
+ +
+ +              r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ +                                        1, ring, false, &size, &addr);
+ +              if (r)
+ +                      goto err;
+ +
+ +              r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ +                                      &next, true, true);
+ +              if (r)
+ +                      goto err;
+ +
+ +              dma_fence_put(*fence);
+ +              *fence = next;
+ +
+ +              amdgpu_res_next(&cursor, size);
+ +      }
+ +err:
+ +      mutex_unlock(&adev->mman.gtt_window_lock);
+ +
+ +      return r;
+ +}
+ +
   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                         uint32_t src_data,
                         struct dma_resv *resv,
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 4f5e70ee9ad0de3de1fb78825331b434195422e4,53d5a5990c3110d6c1e7bd50ad3a6d3e77dd60d4..b6f53129dea3011f4cf74d4f632d35ef6a8d988e
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@@ -38,6 -38,8 +38,6 @@@
   #define AMDGPU_GTT_MAX_TRANSFER_SIZE  512
   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS       2
   
- -#define AMDGPU_POISON 0xd0bed0be
- -
   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
   
@@@ -109,6 -111,8 +109,8 @@@ struct amdgpu_copy_mem 
         unsigned long                   offset;
   };
   
+ #define AMDGPU_COPY_FLAGS_TMZ         (1 << 0)
+ 
   int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
   void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
   int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
@@@ -149,16 -153,13 +151,16 @@@ int amdgpu_copy_buffer(struct amdgpu_ri
                        uint64_t dst_offset, uint32_t byte_count,
                        struct dma_resv *resv,
                        struct dma_fence **fence, bool direct_submit,
-                      bool vm_needs_flush, bool tmz);
+                      bool vm_needs_flush, uint32_t copy_flags);
   int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                                const struct amdgpu_copy_mem *src,
                                const struct amdgpu_copy_mem *dst,
                                uint64_t size, bool tmz,
                                struct dma_resv *resv,
                                struct dma_fence **f);
+ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ +                          struct dma_resv *resv,
+ +                          struct dma_fence **fence);
   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                         uint32_t src_data,
                         struct dma_resv *resv,
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a1780894fc4be4eedf577da4a1ebd,f23002ed2b42ae50020e7e3b9bff44494633b131..6c30eceec896591337ee9458413a610db2a78f27
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@@ -469,7 -469,7 +469,7 @@@ static int amdgpu_vram_mgr_new(struct t
         if (tbo->type != ttm_bo_type_kernel)
                 max_bytes -= AMDGPU_VM_RESERVED_VRAM;
   
-       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
                 pages_per_block = ~0ul;
         } else {
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@@ -478,7 -478,7 +478,7 @@@
                 /* default to 2MB */
                 pages_per_block = 2UL << (20UL - PAGE_SHIFT);
   #endif
-               pages_per_block = max_t(uint32_t, pages_per_block,
+               pages_per_block = max_t(u32, pages_per_block,
                                         tbo->page_alignment);
         }
   
@@@ -499,12 -499,9 +499,12 @@@
         if (place->flags & TTM_PL_FLAG_TOPDOWN)
                 vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
   
-       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
                 vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
   
+ +      if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ +              vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+ +
         if (fpfn || lpfn != mgr->mm.size)
                 /* Allocate blocks in desired range */
                 vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@@ -518,21 -515,31 +518,31 @@@
                 else
                         min_block_size = mgr->default_page_size;
   
-               BUG_ON(min_block_size < mm->chunk_size);
- 
                 /* Limit maximum size to 2GiB due to SG table limitations */
                 size = min(remaining_size, 2ULL << 30);
   
                 if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
-                               !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+                   !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
                         min_block_size = (u64)pages_per_block << PAGE_SHIFT;
   
+               BUG_ON(min_block_size < mm->chunk_size);
+ 
                 r = drm_buddy_alloc_blocks(mm, fpfn,
                                            lpfn,
                                            size,
                                            min_block_size,
                                            &vres->blocks,
                                            vres->flags);
+ 
+               if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+                   !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+                       vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+                       pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+                                               tbo->page_alignment);
+ 
+                       continue;
+               }
+ 
                 if (unlikely(r))
                         goto error_free_blocks;
   
@@@ -575,7 -582,7 +585,7 @@@
         return 0;
   
   error_free_blocks:
- -      drm_buddy_free_list(mm, &vres->blocks);
+ +      drm_buddy_free_list(mm, &vres->blocks, 0);
         mutex_unlock(&mgr->lock);
   error_fini:
         ttm_resource_fini(man, &vres->base);
@@@ -608,7 -615,7 +618,7 @@@ static void amdgpu_vram_mgr_del(struct 
   
         amdgpu_vram_mgr_do_reserve(man);
   
- -      drm_buddy_free_list(mm, &vres->blocks);
+ +      drm_buddy_free_list(mm, &vres->blocks, vres->flags);
         mutex_unlock(&mgr->lock);
   
         atomic64_sub(vis_usage, &mgr->vis_usage);
@@@ -916,7 -923,7 +926,7 @@@ void amdgpu_vram_mgr_fini(struct amdgpu
                 kfree(rsv);
   
         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- -              drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+ +              drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
                 kfree(rsv);
         }
         if (!adev->gmc.is_app_apu)
author	Dave Airlie <[email protected]>
	Tue, 30 Apr 2024 04:42:54 +0000 (14:42 +1000)
committer	Dave Airlie <[email protected]>
	Tue, 30 Apr 2024 04:43:00 +0000 (14:43 +1000)
		1	2
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c	patch \|	diff1 \|	diff2 \|	blob \| history