#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
/**
* DOC: amdgpu_object
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
- if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ if (abo->tbo.type == ttm_bo_type_kernel &&
+ flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
c++;
}
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (adev->ras_enabled)
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
bo->tbo.bdev = &adev->mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+ r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
amdgpu_bo_size(shadow), NULL, fence,
- true, false, false);
+ true, false, 0);
}
/**
if (!bo->placements[i].lpfn ||
(lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+ bo->placements[i].mem_type == TTM_PL_VRAM)
+ bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+ r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
if (!WARN_ON(r)) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+
if (!adev->mman.buffer_funcs_enabled) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
- false);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
swap(src_addr, dst_addr);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- PAGE_SIZE, false);
+ PAGE_SIZE, 0);
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
return 0;
}
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r;
+
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
+
+ if (!fence)
+ return -EINVAL;
+
+ *fence = dma_fence_get_stub();
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
+
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
+
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
-#define AMDGPU_POISON 0xd0bed0be
-
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
unsigned long offset;
};
+ #define AMDGPU_COPY_FLAGS_TMZ (1 << 0)
+
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz);
+ bool vm_needs_flush, uint32_t copy_flags);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
struct dma_resv *resv,
struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* default to 2MB */
pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_block = max_t(uint32_t, pages_per_block,
+ pages_per_block = max_t(u32, pages_per_block,
tbo->page_alignment);
}
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
else
min_block_size = mgr->default_page_size;
- BUG_ON(min_block_size < mm->chunk_size);
-
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
- !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+ BUG_ON(min_block_size < mm->chunk_size);
+
r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
&vres->blocks,
vres->flags);
+
+ if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+ vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+ tbo->page_alignment);
+
+ continue;
+ }
+
if (unlikely(r))
goto error_free_blocks;
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks);
+ drm_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
amdgpu_vram_mgr_do_reserve(man);
- drm_buddy_free_list(mm, &vres->blocks);
+ drm_buddy_free_list(mm, &vres->blocks, vres->flags);
mutex_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)