X-Git-Url: https://repo.jachan.dev/linux.git/blobdiff_plain/0a2a1330d2621c7f963d9f55bb094811cc1c06b9..190b10367b0d311f68dc71e40b254fd4427affc2:/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8ee69652be8c..ea25164e7f4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -37,63 +37,12 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-
-
-static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
-						struct ttm_mem_reg *mem)
-{
-	if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
-		return 0;
-
-	return ((mem->start << PAGE_SHIFT) + mem->size) >
-		adev->mc.visible_vram_size ?
-		adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
-		mem->size;
-}
-
-static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
-		       struct ttm_mem_reg *old_mem,
-		       struct ttm_mem_reg *new_mem)
-{
-	u64 vis_size;
-	if (!adev)
-		return;
-
-	if (new_mem) {
-		switch (new_mem->mem_type) {
-		case TTM_PL_TT:
-			atomic64_add(new_mem->size, &adev->gtt_usage);
-			break;
-		case TTM_PL_VRAM:
-			atomic64_add(new_mem->size, &adev->vram_usage);
-			vis_size = amdgpu_get_vis_part_size(adev, new_mem);
-			atomic64_add(vis_size, &adev->vram_vis_usage);
-			break;
-		}
-	}
-
-	if (old_mem) {
-		switch (old_mem->mem_type) {
-		case TTM_PL_TT:
-			atomic64_sub(old_mem->size, &adev->gtt_usage);
-			break;
-		case TTM_PL_VRAM:
-			atomic64_sub(old_mem->size, &adev->vram_usage);
-			vis_size = amdgpu_get_vis_part_size(adev, old_mem);
-			atomic64_sub(vis_size, &adev->vram_vis_usage);
-			break;
-		}
-	}
-}
-
 static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-	struct amdgpu_bo *bo;
-
-	bo = container_of(tbo, struct amdgpu_bo, tbo);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 
-	amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
+	amdgpu_bo_kunmap(bo);
 
 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
@@ -113,11 +62,12 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
-				      struct ttm_placement *placement,
-				      struct ttm_place *places,
-				      u32 domain, u64 flags)
+void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct ttm_placement *placement = &abo->placement;
+	struct ttm_place *places = abo->placements;
+	u64 flags = abo->flags;
 	u32 c = 0;
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
@@ -140,7 +90,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
-		places[c].lpfn = 0;
+		if (flags & AMDGPU_GEM_CREATE_SHADOW)
+			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+		else
+			places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
 		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 			places[c].flags |= TTM_PL_FLAG_WC |
@@ -197,29 +150,8 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 	placement->busy_placement = places;
 }
 
-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
-{
-	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
-
-	amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements,
-				  domain, abo->flags);
-}
-
-static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
-					struct ttm_placement *placement)
-{
-	BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1));
-
-	memcpy(bo->placements, placement->placement,
-	       placement->num_placement * sizeof(struct ttm_place));
-	bo->placement.num_placement = placement->num_placement;
-	bo->placement.num_busy_placement = placement->num_busy_placement;
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-}
-
 /**
- * amdgpu_bo_create_kernel - create BO for kernel use
+ * amdgpu_bo_create_reserved - create reserved BO for kernel use
  *
  * @adev: amdgpu device object
  * @size: size for the new BO
@@ -229,24 +161,30 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
  * @gpu_addr: GPU addr of the pinned BO
  * @cpu_addr: optional CPU address mapping
  *
- * Allocates and pins a BO for kernel internal use.
+ * Allocates and pins a BO for kernel internal use, and returns it still
+ * reserved.
  *
  * Returns 0 on success, negative error code otherwise.
  */
-int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
-			    unsigned long size, int align,
-			    u32 domain, struct amdgpu_bo **bo_ptr,
-			    u64 *gpu_addr, void **cpu_addr)
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+			      unsigned long size, int align,
+			      u32 domain, struct amdgpu_bo **bo_ptr,
+			      u64 *gpu_addr, void **cpu_addr)
 {
+	bool free = false;
 	int r;
 
-	r = amdgpu_bo_create(adev, size, align, true, domain,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-			     NULL, NULL, bo_ptr);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
-		return r;
+	if (!*bo_ptr) {
+		r = amdgpu_bo_create(adev, size, align, true, domain,
+				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+				     NULL, NULL, 0, bo_ptr);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
+				r);
+			return r;
+		}
+		free = true;
 	}
 
 	r = amdgpu_bo_reserve(*bo_ptr, false);
@@ -269,19 +207,51 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 		}
 	}
 
-	amdgpu_bo_unreserve(*bo_ptr);
-
 	return 0;
 
 error_unreserve:
 	amdgpu_bo_unreserve(*bo_ptr);
 
 error_free:
-	amdgpu_bo_unref(bo_ptr);
+	if (free)
+		amdgpu_bo_unref(bo_ptr);
 
 	return r;
 }
 
+/**
+ * amdgpu_bo_create_kernel - create BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr: resulting BO
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+			    unsigned long size, int align,
+			    u32 domain, struct amdgpu_bo **bo_ptr,
+			    u64 *gpu_addr, void **cpu_addr)
+{
+	int r;
+
+	r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
+				      gpu_addr, cpu_addr);
+
+	if (r)
+		return r;
+
+	amdgpu_bo_unreserve(*bo_ptr);
+
+	return 0;
+}
+
 /**
  * amdgpu_bo_free_kernel - free BO for kernel use
  *
@@ -311,18 +281,18 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 		*cpu_addr = NULL;
 }
 
-int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
-				unsigned long size, int byte_align,
-				bool kernel, u32 domain, u64 flags,
-				struct sg_table *sg,
-				struct ttm_placement *placement,
-				struct reservation_object *resv,
-				struct amdgpu_bo **bo_ptr)
+static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+			       unsigned long size, int byte_align,
+			       bool kernel, u32 domain, u64 flags,
+			       struct sg_table *sg,
+			       struct reservation_object *resv,
+			       uint64_t init_value,
+			       struct amdgpu_bo **bo_ptr)
 {
 	struct amdgpu_bo *bo;
 	enum ttm_bo_type type;
 	unsigned long page_align;
-	u64 initial_bytes_moved;
+	u64 initial_bytes_moved, bytes_moved;
 	size_t acc_size;
 	int r;
 
@@ -351,13 +321,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	}
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
-	bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
+	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
 					 AMDGPU_GEM_DOMAIN_GTT |
 					 AMDGPU_GEM_DOMAIN_CPU |
 					 AMDGPU_GEM_DOMAIN_GDS |
 					 AMDGPU_GEM_DOMAIN_GWS |
 					 AMDGPU_GEM_DOMAIN_OA);
-	bo->allowed_domains = bo->prefered_domains;
+	bo->allowed_domains = bo->preferred_domains;
 	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
@@ -391,19 +361,26 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 #endif
 
-	amdgpu_fill_placement_to_bo(bo, placement);
-	/* Kernel allocation are uninterruptible */
+	bo->tbo.bdev = &adev->mman.bdev;
+	amdgpu_ttm_placement_from_domain(bo, domain);
 
 	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+	/* Kernel allocation are uninterruptible */
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
 				 &bo->placement, page_align, !kernel, NULL,
 				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-	amdgpu_cs_report_moved_bytes(adev,
-		atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
-
 	if (unlikely(r != 0))
 		return r;
 
+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+		      initial_bytes_moved;
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+	else
+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
+
 	if (kernel)
 		bo->tbo.priority = 1;
 
@@ -411,7 +388,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
+		r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
 		if (unlikely(r))
 			goto fail_unreserve;
 
@@ -426,6 +403,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
 	trace_amdgpu_bo_create(bo);
 
+	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
+	if (type == ttm_bo_type_device)
+		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
 	return 0;
 
 fail_unreserve:
@@ -439,27 +420,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 				   unsigned long size, int byte_align,
 				   struct amdgpu_bo *bo)
 {
-	struct ttm_placement placement = {0};
-	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
 	int r;
 
 	if (bo->shadow)
 		return 0;
 
-	bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, AMDGPU_GEM_DOMAIN_GTT,
-				  AMDGPU_GEM_CREATE_CPU_GTT_USWC);
-
-	r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
-					AMDGPU_GEM_DOMAIN_GTT,
-					AMDGPU_GEM_CREATE_CPU_GTT_USWC,
-					NULL, &placement,
-					bo->tbo.resv,
-					&bo->shadow);
+	r = amdgpu_bo_do_create(adev, size, byte_align, true,
+				AMDGPU_GEM_DOMAIN_GTT,
+				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+				AMDGPU_GEM_CREATE_SHADOW,
+				NULL, bo->tbo.resv, 0,
+				&bo->shadow);
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
@@ -470,39 +441,34 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	return r;
 }
 
+/* init_value will only take effect when flags contains
+ * AMDGPU_GEM_CREATE_VRAM_CLEARED.
+ */
 int amdgpu_bo_create(struct amdgpu_device *adev,
 		     unsigned long size, int byte_align,
 		     bool kernel, u32 domain, u64 flags,
 		     struct sg_table *sg,
 		     struct reservation_object *resv,
+		     uint64_t init_value,
 		     struct amdgpu_bo **bo_ptr)
 {
-	struct ttm_placement placement = {0};
-	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	int r;
 
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, domain, flags);
-
-	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
-					domain, flags, sg, &placement,
-					resv, bo_ptr);
+	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
+				parent_flags, sg, resv, init_value, bo_ptr);
 	if (r)
 		return r;
 
-	if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
-		if (!resv) {
-			r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
-			WARN_ON(r != 0);
-		}
+	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+		if (!resv)
+			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
+							NULL));
 
 		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
 
 		if (!resv)
-			ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
+			reservation_object_unlock((*bo_ptr)->tbo.resv);
 
 		if (r)
 			amdgpu_bo_unref(bo_ptr);
@@ -535,7 +501,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 
 	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
 			       amdgpu_bo_size(bo), resv, fence,
-			       direct);
+			       direct, false);
 	if (!r)
 		amdgpu_bo_fence(bo, *fence, true);
 
@@ -551,7 +517,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
 	if (bo->pin_count)
 		return 0;
 
-	domain = bo->prefered_domains;
+	domain = bo->preferred_domains;
 
 retry:
 	amdgpu_ttm_placement_from_domain(bo, domain);
@@ -588,7 +554,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 
 	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
 			       amdgpu_bo_size(bo), resv, fence,
-			       direct);
+			       direct, false);
 	if (!r)
 		amdgpu_bo_fence(bo, *fence, true);
 
@@ -598,16 +564,16 @@ err:
 
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
-	bool is_iomem;
+	void *kptr;
 	long r;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 		return -EPERM;
 
-	if (bo->kptr) {
-		if (ptr) {
-			*ptr = bo->kptr;
-		}
+	kptr = amdgpu_bo_kptr(bo);
+	if (kptr) {
+		if (ptr)
+			*ptr = kptr;
 		return 0;
 	}
 
@@ -620,19 +586,23 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 	if (r)
 		return r;
 
-	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
 	if (ptr)
-		*ptr = bo->kptr;
+		*ptr = amdgpu_bo_kptr(bo);
 
 	return 0;
 }
 
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
+{
+	bool is_iomem;
+
+	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+}
+
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
 {
-	if (bo->kptr == NULL)
-		return;
-	bo->kptr = NULL;
-	ttm_bo_kunmap(&bo->kmap);
+	if (bo->kmap.bo)
+		ttm_bo_kunmap(&bo->kmap);
 }
 
 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
@@ -663,7 +633,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r, i;
-	unsigned fpfn, lpfn;
 
 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return -EPERM;
@@ -695,22 +664,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	}
 
 	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	/* force to pin into visible video ram */
+	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
+		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	for (i = 0; i < bo->placement.num_placement; i++) {
-		/* force to pin into visible video ram */
-		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
-		    (!max_offset || max_offset >
-		     adev->mc.visible_vram_size)) {
-			if (WARN_ON_ONCE(min_offset >
-					 adev->mc.visible_vram_size))
-				return -EINVAL;
-			fpfn = min_offset >> PAGE_SHIFT;
-			lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
-		} else {
-			fpfn = min_offset >> PAGE_SHIFT;
-			lpfn = max_offset >> PAGE_SHIFT;
-		}
+		unsigned fpfn, lpfn;
+
+		fpfn = min_offset >> PAGE_SHIFT;
+		lpfn = max_offset >> PAGE_SHIFT;
+
 		if (fpfn > bo->placements[i].fpfn)
 			bo->placements[i].fpfn = fpfn;
 		if (!bo->placements[i].lpfn ||
@@ -724,15 +687,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		dev_err(adev->dev, "%p pin failed\n", bo);
 		goto error;
 	}
-	r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
-	if (unlikely(r)) {
-		dev_err(adev->dev, "%p bind failed\n", bo);
-		goto error;
-	}
 
 	bo->pin_count = 1;
-	if (gpu_addr != NULL)
+	if (gpu_addr != NULL) {
+		r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
+		if (unlikely(r)) {
+			dev_err(adev->dev, "%p bind failed\n", bo);
+			goto error;
+		}
 		*gpu_addr = amdgpu_bo_gpu_offset(bo);
+	}
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
 		adev->vram_pin_size += amdgpu_bo_size(bo);
 		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
@@ -918,8 +882,10 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
 
-	abo = container_of(bo, struct amdgpu_bo, tbo);
-	amdgpu_vm_bo_invalidate(adev, abo);
+	abo = ttm_to_amdgpu_bo(bo);
+	amdgpu_vm_bo_invalidate(adev, abo, evict);
+
+	amdgpu_bo_kunmap(abo);
 
 	/* remember the eviction */
 	if (evict)
@@ -930,8 +896,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;
 
 	/* move_notify is called before move happens */
-	amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
-
 	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }
 
@@ -939,19 +903,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
-	unsigned long offset, size, lpfn;
-	int i, r;
+	unsigned long offset, size;
+	int r;
 
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return 0;
 
-	abo = container_of(bo, struct amdgpu_bo, tbo);
+	abo = ttm_to_amdgpu_bo(bo);
+
+	/* Remember that this BO was accessed by the CPU */
+	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return 0;
 
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
-	/* TODO: figure out how to map scattered VRAM to the CPU */
 	if ((offset + size) <= adev->mc.visible_vram_size)
 		return 0;
 
@@ -961,26 +928,21 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 	/* hurrah the memory is not visible ! */
 	atomic64_inc(&adev->num_vram_cpu_page_faults);
-	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
-	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
-	for (i = 0; i < abo->placement.num_placement; i++) {
-		/* Force into visible VRAM */
-		if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    (!abo->placements[i].lpfn ||
-		     abo->placements[i].lpfn > lpfn))
-			abo->placements[i].lpfn = lpfn;
-	}
+	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+					 AMDGPU_GEM_DOMAIN_GTT);
+
+	/* Avoid costly evictions; only set GTT as a busy placement */
+	abo->placement.num_busy_placement = 1;
+	abo->placement.busy_placement = &abo->placements[1];
+
 	r = ttm_bo_validate(bo, &abo->placement, false, false);
-	if (unlikely(r == -ENOMEM)) {
-		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
-		return ttm_bo_validate(bo, &abo->placement, false, false);
-	} else if (unlikely(r != 0)) {
+	if (unlikely(r != 0))
 		return r;
-	}
 
 	offset = bo->mem.start << PAGE_SHIFT;
 	/* this should never happen */
-	if ((offset + size) > adev->mc.visible_vram_size)
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    (offset + size) > adev->mc.visible_vram_size)
 		return -EINVAL;
 
 	return 0;