(REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
(REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
-static char *ivpu_platform_to_str(u32 platform)
-{
- switch (platform) {
- case IVPU_PLATFORM_SILICON:
- return "IVPU_PLATFORM_SILICON";
- case IVPU_PLATFORM_SIMICS:
- return "IVPU_PLATFORM_SIMICS";
- case IVPU_PLATFORM_FPGA:
- return "IVPU_PLATFORM_FPGA";
- default:
- return "Invalid platform";
- }
-}
-
-static void ivpu_hw_read_platform(struct ivpu_device *vdev)
-{
- u32 gen_ctrl = REGV_RD32(VPU_37XX_HOST_SS_GEN_CTRL);
- u32 platform = REG_GET_FLD(VPU_37XX_HOST_SS_GEN_CTRL, PS, gen_ctrl);
-
- if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA)
- vdev->platform = platform;
- else
- vdev->platform = IVPU_PLATFORM_SILICON;
-
- ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n",
- ivpu_platform_to_str(vdev->platform), vdev->platform);
-}
-
static void ivpu_hw_wa_init(struct ivpu_device *vdev)
{
- vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
+ vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
vdev->wa.d3hot_after_power_off = true;
if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4)
vdev->wa.interrupt_clear_with_0 = true;
+
+ IVPU_PRINT_WA(punit_disabled);
+ IVPU_PRINT_WA(clear_runtime_mem);
+ IVPU_PRINT_WA(d3hot_after_power_off);
+ IVPU_PRINT_WA(interrupt_clear_with_0);
}
static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
{
- if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) {
- vdev->timeout.boot = 100000;
- vdev->timeout.jsm = 50000;
- vdev->timeout.tdr = 2000000;
- vdev->timeout.reschedule_suspend = 1000;
- } else {
- vdev->timeout.boot = 1000;
- vdev->timeout.jsm = 500;
- vdev->timeout.tdr = 2000;
- vdev->timeout.reschedule_suspend = 10;
- }
+ vdev->timeout.boot = 1000;
+ vdev->timeout.jsm = 500;
+ vdev->timeout.tdr = 2000;
+ vdev->timeout.reschedule_suspend = 10;
+ vdev->timeout.autosuspend = 10;
}
static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
int ret;
if (IVPU_WA(punit_disabled)) {
- ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n",
- ivpu_platform_to_str(vdev->platform));
+ ivpu_dbg(vdev, PM, "Skipping PLL request\n");
return 0;
}
static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
{
- u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
- if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
return -EIO;
return 0;
static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
{
- u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN);
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN);
- if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
return -EIO;
return 0;
static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
{
- u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY);
+ u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY);
- if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
- !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+ if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+ !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
return -EIO;
return 0;
int ret;
u32 val;
- val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+ val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
if (enable) {
- val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
} else {
- val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
- val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+ val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+ val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
}
- REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val);
+ REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val);
ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
if (ret) {
static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
{
- /* FPGA model (UPF) is not power aware, skipped Power Island polling */
- if (ivpu_is_fpga(vdev))
- return 0;
-
return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU,
exp_val, PWR_ISLAND_STATUS_TIMEOUT_US);
}
{
u32 val;
- val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
- val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
+ val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
+ val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
- val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
- REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
- val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
- REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+ val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
- val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
- REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+ REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
val = vdev->fw->entry_point >> 9;
REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G);
ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G);
+ vdev->platform = IVPU_PLATFORM_SILICON;
+ ivpu_hw_wa_init(vdev);
+ ivpu_hw_timeouts_init(vdev);
+
return 0;
}
{
int ret;
- ivpu_hw_read_platform(vdev);
- ivpu_hw_wa_init(vdev);
- ivpu_hw_timeouts_init(vdev);
-
ret = ivpu_hw_37xx_reset(vdev);
if (ret)
ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
u32 val;
/* Enable writing and set non-zero WDT value */
- REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
/* Enable writing and disable watchdog timer */
- REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
- REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0);
/* Now clear the timeout interrupt */
- val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG);
- val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
- REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val);
+ val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG);
+ val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
}
static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id)
{
- u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0;
- u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET);
+ u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0;
+ u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET);
- REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+ REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
}
static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
{
- REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+ REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
}
static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev)
if (status == 0)
return 0;
- /* Disable global interrupt before handling local buttress interrupts */
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
-
if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
REGB_RD32(VPU_37XX_BUTTRESS_CURRENT_PLL));
else
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);
- /* Re-enable global interrupt */
- REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
-
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
struct ivpu_device *vdev = ptr;
u32 ret_irqv, ret_irqb;
+ REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
+
ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
+ /* Re-enable global interrupts to re-trigger MSI for pending interrupts */
+ REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
+
return IRQ_RETVAL(ret_irqb | ret_irqv);
}
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
/*
* Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
return;
si_meminfo(&si);
- mem = si.freeram - si.freehigh;
+ mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+ if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+ kfd_mem_limit.max_system_mem_limit >>= 1;
+ else
+ kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
return ret;
}
+static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
+
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
enum dma_data_direction dir;
if (unlikely(!ttm->sg)) {
- pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+ pr_debug("SG Table of BO is NULL");
return;
}
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+ struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
- same_hive) {
+ (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
- * local hive, or userptr mapping can reuse dma map
+ * local hive, or userptr, or GTT mapping can reuse dma map
* address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
- attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ else
+ ++bo_va->ref_count;
+ attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
continue;
if (attachment[i]->bo_va) {
amdgpu_bo_reserve(bo[i], true);
- amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+ if (--attachment[i]->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
- amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+ if (--attachment->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);
if (unlikely(ret))
goto error;
- ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(ret))
goto error;
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
amdgpu_sync_fence(sync, bo_va->last_pt_update);
-
- kfd_mem_dmaunmap_attachment(mem, entry);
}
static int update_gpuvm_pte(struct kgd_mem *mem,
update_gpuvm_pte_failed:
unmap_bo_from_gpuvm(mem, entry, sync);
+ kfd_mem_dmaunmap_attachment(mem, entry);
return ret;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
}
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
allocate_init_user_pages_failed:
err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
- list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+ kfd_mem_dmaunmap_attachment(mem, entry);
kfd_mem_detach(entry);
+ }
ret = unreserve_bo_and_vms(&ctx, false, false);
if (unlikely(ret))
goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto out_unreserve;
- }
- }
-
list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- dma_resv_add_fence(bo->tbo.base.resv,
- &avm->process_info->eviction_fence->base,
- DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
return ret;
}
+void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_vm *vm;
+
+ vm = drm_priv_to_vm(drm_priv);
+
+ mutex_lock(&mem->lock);
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm == vm)
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ }
+
+ mutex_unlock(&mem->lock);
+}
+
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
mem->mapped_to_gpu_memory);
}
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
-
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
return 0;
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
err_free_mem:
kfree(*mem);
err_put_obj:
}
amdgpu_sync_create(&p->sync);
- drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES);
return 0;
}
return r;
}
+ /* FIXME: In theory this loop shouldn't be needed any more when
+ * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+ * with p->ticket. But removing it caused test regressions, so I'm
+ * leaving it here for now.
+ */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
bo_va = e->bo_va;
if (bo_va == NULL)
return r;
}
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
if (r)
return r;
job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
}
- if (amdgpu_vm_debug) {
+ if (adev->debug_vm) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->bo;
r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
- if (printk_ratelimit())
- DRM_ERROR("Failed to initialize parser %d!\n", r);
+ DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r);
return r;
}
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
[AMDGPU_HW_IP_VCN_JPEG] = 1,
+ [AMDGPU_HW_IP_VPE] = 1,
};
bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
return true;
default:
case AMDGPU_CTX_PRIORITY_UNSET:
+ /* UNSET priority is not valid and we don't carry that
+ * around, but set it to NORMAL in the only place this
+ * function is called, amdgpu_ctx_ioctl().
+ */
return false;
}
}
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
int32_t priority)
{
- if (!amdgpu_ctx_priority_is_valid(priority))
- return -EINVAL;
-
/* NORMAL and below are accessible by everyone */
if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
return 0;
return 0;
}
-
-
static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv, uint32_t id,
bool set, u32 *stable_pstate)
id = args->in.ctx_id;
priority = args->in.priority;
- /* For backwards compatibility reasons, we need to accept
- * ioctls with garbage in the priority field */
+ /* For backwards compatibility, we need to accept ioctls with garbage
+ * in the priority field. Garbage values in the priority field, result
+ * in the priority being set to NORMAL.
+ */
if (!amdgpu_ctx_priority_is_valid(priority))
priority = AMDGPU_CTX_PRIORITY_NORMAL;
struct drm_dp_mst_branch *found_mstb;
struct drm_dp_mst_port *port;
+ if (!mstb)
+ return NULL;
+
if (memcmp(mstb->guid, guid, 16) == 0)
return mstb;
list_for_each_entry(port, &mstb->ports, next) {
- if (!port->mstb)
- continue;
-
found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid);
if (found_mstb)
}
EXPORT_SYMBOL(drm_dp_send_query_stream_enc_status);
-static int drm_dp_create_payload_step1(struct drm_dp_mst_topology_mgr *mgr,
- struct drm_dp_mst_atomic_payload *payload)
+static int drm_dp_create_payload_at_dfp(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_atomic_payload *payload)
{
return drm_dp_dpcd_write_payload(mgr, payload->vcpi, payload->vc_start_slot,
payload->time_slots);
}
-static int drm_dp_create_payload_step2(struct drm_dp_mst_topology_mgr *mgr,
- struct drm_dp_mst_atomic_payload *payload)
+static int drm_dp_create_payload_to_remote(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_atomic_payload *payload)
{
int ret;
struct drm_dp_mst_port *port = drm_dp_mst_topology_get_port_validated(mgr, payload->port);
return ret;
}
-static int drm_dp_destroy_payload_step1(struct drm_dp_mst_topology_mgr *mgr,
- struct drm_dp_mst_topology_state *mst_state,
- struct drm_dp_mst_atomic_payload *payload)
+static void drm_dp_destroy_payload_at_remote_and_dfp(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_topology_state *mst_state,
+ struct drm_dp_mst_atomic_payload *payload)
{
drm_dbg_kms(mgr->dev, "\n");
/* it's okay for these to fail */
- drm_dp_payload_send_msg(mgr, payload->port, payload->vcpi, 0);
- drm_dp_dpcd_write_payload(mgr, payload->vcpi, payload->vc_start_slot, 0);
+ if (payload->payload_allocation_status == DRM_DP_MST_PAYLOAD_ALLOCATION_REMOTE) {
+ drm_dp_payload_send_msg(mgr, payload->port, payload->vcpi, 0);
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_DFP;
+ }
- return 0;
+ if (payload->payload_allocation_status == DRM_DP_MST_PAYLOAD_ALLOCATION_DFP)
+ drm_dp_dpcd_write_payload(mgr, payload->vcpi, payload->vc_start_slot, 0);
}
/**
* @payload: The payload to write
*
* Determines the starting time slot for the given payload, and programs the VCPI for this payload
- * into hardware. After calling this, the driver should generate ACT and payload packets.
+ * into the DPCD of DPRX. After calling this, the driver should generate ACT and payload packets.
*
- * Returns: 0 on success, error code on failure. In the event that this fails,
- * @payload.vc_start_slot will also be set to -1.
+ * Returns: 0 on success, error code on failure.
*/
int drm_dp_add_payload_part1(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_topology_state *mst_state,
struct drm_dp_mst_port *port;
int ret;
+ /* Update mst mgr info */
+ if (mgr->payload_count == 0)
+ mgr->next_start_slot = mst_state->start_slot;
+
+ payload->vc_start_slot = mgr->next_start_slot;
+
+ mgr->payload_count++;
+ mgr->next_start_slot += payload->time_slots;
+
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_LOCAL;
+
+ /* Allocate payload to immediate downstream facing port */
port = drm_dp_mst_topology_get_port_validated(mgr, payload->port);
if (!port) {
drm_dbg_kms(mgr->dev,
- "VCPI %d for port %p not in topology, not creating a payload\n",
+ "VCPI %d for port %p not in topology, not creating a payload to remote\n",
payload->vcpi, payload->port);
- payload->vc_start_slot = -1;
- return 0;
+ return -EIO;
}
- if (mgr->payload_count == 0)
- mgr->next_start_slot = mst_state->start_slot;
-
- payload->vc_start_slot = mgr->next_start_slot;
-
- ret = drm_dp_create_payload_step1(mgr, payload);
- drm_dp_mst_topology_put_port(port);
+ ret = drm_dp_create_payload_at_dfp(mgr, payload);
if (ret < 0) {
- drm_warn(mgr->dev, "Failed to create MST payload for port %p: %d\n",
- payload->port, ret);
- payload->vc_start_slot = -1;
- return ret;
+ drm_dbg_kms(mgr->dev, "Failed to create MST payload for port %p: %d\n",
+ payload->port, ret);
+ goto put_port;
}
- mgr->payload_count++;
- mgr->next_start_slot += payload->time_slots;
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_DFP;
- return 0;
+put_port:
+ drm_dp_mst_topology_put_port(port);
+
+ return ret;
}
EXPORT_SYMBOL(drm_dp_add_payload_part1);
/**
- * drm_dp_remove_payload() - Remove an MST payload
+ * drm_dp_remove_payload_part1() - Remove an MST payload along the virtual channel
* @mgr: Manager to use.
* @mst_state: The MST atomic state
- * @old_payload: The payload with its old state
- * @new_payload: The payload to write
+ * @payload: The payload to remove
*
- * Removes a payload from an MST topology if it was successfully assigned a start slot. Also updates
- * the starting time slots of all other payloads which would have been shifted towards the start of
- * the VC table as a result. After calling this, the driver should generate ACT and payload packets.
+ * Removes a payload along the virtual channel if it was successfully allocated.
+ * After calling this, the driver should set HW to generate ACT and then switch to new
+ * payload allocation state.
*/
-void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
- struct drm_dp_mst_topology_state *mst_state,
- const struct drm_dp_mst_atomic_payload *old_payload,
- struct drm_dp_mst_atomic_payload *new_payload)
+void drm_dp_remove_payload_part1(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_topology_state *mst_state,
+ struct drm_dp_mst_atomic_payload *payload)
{
- struct drm_dp_mst_atomic_payload *pos;
+ /* Remove remote payload allocation */
bool send_remove = false;
- /* We failed to make the payload, so nothing to do */
- if (new_payload->vc_start_slot == -1)
- return;
-
mutex_lock(&mgr->lock);
- send_remove = drm_dp_mst_port_downstream_of_branch(new_payload->port, mgr->mst_primary);
+ send_remove = drm_dp_mst_port_downstream_of_branch(payload->port, mgr->mst_primary);
mutex_unlock(&mgr->lock);
if (send_remove)
- drm_dp_destroy_payload_step1(mgr, mst_state, new_payload);
+ drm_dp_destroy_payload_at_remote_and_dfp(mgr, mst_state, payload);
else
drm_dbg_kms(mgr->dev, "Payload for VCPI %d not in topology, not sending remove\n",
- new_payload->vcpi);
+ payload->vcpi);
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_LOCAL;
+}
+EXPORT_SYMBOL(drm_dp_remove_payload_part1);
+
+/**
+ * drm_dp_remove_payload_part2() - Remove an MST payload locally
+ * @mgr: Manager to use.
+ * @mst_state: The MST atomic state
+ * @old_payload: The payload with its old state
+ * @new_payload: The payload with its latest state
+ *
+ * Updates the starting time slots of all other payloads which would have been shifted towards
+ * the start of the payload ID table as a result of removing a payload. Driver should call this
+ * function whenever it removes a payload in its HW. It's independent to the result of payload
+ * allocation/deallocation at branch devices along the virtual channel.
+ */
+void drm_dp_remove_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_topology_state *mst_state,
+ const struct drm_dp_mst_atomic_payload *old_payload,
+ struct drm_dp_mst_atomic_payload *new_payload)
+{
+ struct drm_dp_mst_atomic_payload *pos;
+
+ /* Remove local payload allocation */
list_for_each_entry(pos, &mst_state->payloads, next) {
if (pos != new_payload && pos->vc_start_slot > new_payload->vc_start_slot)
pos->vc_start_slot -= old_payload->time_slots;
if (new_payload->delete)
drm_dp_mst_put_port_malloc(new_payload->port);
-}
-EXPORT_SYMBOL(drm_dp_remove_payload);
+ new_payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_NONE;
+}
+EXPORT_SYMBOL(drm_dp_remove_payload_part2);
/**
* drm_dp_add_payload_part2() - Execute payload update part 2
* @mgr: Manager to use.
int ret = 0;
/* Skip failed payloads */
- if (payload->vc_start_slot == -1) {
- drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
+ if (payload->payload_allocation_status != DRM_DP_MST_PAYLOAD_ALLOCATION_DFP) {
+ drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
payload->port->connector->name);
return -EIO;
}
- ret = drm_dp_create_payload_step2(mgr, payload);
- if (ret < 0) {
- if (!payload->delete)
- drm_err(mgr->dev, "Step 2 of creating MST payload for %p failed: %d\n",
- payload->port, ret);
- else
- drm_dbg_kms(mgr->dev, "Step 2 of removing MST payload for %p failed: %d\n",
- payload->port, ret);
- }
+ /* Allocate payload to remote end */
+ ret = drm_dp_create_payload_to_remote(mgr, payload);
+ if (ret < 0)
+ drm_err(mgr->dev, "Step 2 of creating MST payload for %p failed: %d\n",
+ payload->port, ret);
+ else
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_REMOTE;
return ret;
}
drm_dp_mst_get_port_malloc(port);
payload->port = port;
payload->vc_start_slot = -1;
+ payload->payload_allocation_status = DRM_DP_MST_PAYLOAD_ALLOCATION_NONE;
list_add(&payload->next, &topology_state->payloads);
}
payload->time_slots = req_slots;
}
/* Now that previous state is committed, it's safe to copy over the start slot
- * assignments
+ * and allocation status assignments
*/
list_for_each_entry(old_payload, &old_mst_state->payloads, next) {
if (old_payload->delete)
new_payload = drm_atomic_get_mst_payload_state(new_mst_state,
old_payload->port);
new_payload->vc_start_slot = old_payload->vc_start_slot;
+ new_payload->payload_allocation_status =
+ old_payload->payload_allocation_status;
}
}
}
struct drm_dp_mst_atomic_payload *payload;
int i, ret;
+ static const char *const status[] = {
+ "None",
+ "Local",
+ "DFP",
+ "Remote",
+ };
+
mutex_lock(&mgr->lock);
if (mgr->mst_primary)
drm_dp_mst_dump_mstb(m, mgr->mst_primary);
seq_printf(m, "payload_mask: %x, max_payloads: %d, start_slot: %u, pbn_div: %d\n",
state->payload_mask, mgr->max_payloads, state->start_slot, state->pbn_div);
- seq_printf(m, "\n| idx | port | vcpi | slots | pbn | dsc | sink name |\n");
+ seq_printf(m, "\n| idx | port | vcpi | slots | pbn | dsc | status | sink name |\n");
for (i = 0; i < mgr->max_payloads; i++) {
list_for_each_entry(payload, &state->payloads, next) {
char name[14];
continue;
fetch_monitor_name(mgr, payload->port, name, sizeof(name));
- seq_printf(m, " %5d %6d %6d %02d - %02d %5d %5s %19s\n",
+ seq_printf(m, " %5d %6d %6d %02d - %02d %5d %5s %8s %19s\n",
i,
payload->port->port_num,
payload->vcpi,
payload->vc_start_slot + payload->time_slots - 1,
payload->pbn,
payload->dsc_enabled ? "Y" : "N",
+ status[payload->payload_allocation_status],
(*name != 0) ? name : "Unknown");
}
}