#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
-static unsigned int ivpu_tdr_timeout_ms;
-module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
-
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
{
ivpu_hw_reg_db_set(vdev, cmdq->db_id);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_stop_job_timeout_detection(vdev);
+
job_put(job);
return 0;
}
ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
if (ret)
ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+
+ if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
}
void ivpu_jobs_abort_all(struct ivpu_device *vdev)
if (ret)
goto err_xa_erase;
+ ivpu_start_job_timeout_detection(vdev);
+
ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
job->engine_idx, cmdq->jobq->header.tail);
struct ivpu_device *vdev = (struct ivpu_device *)arg;
struct ivpu_ipc_consumer cons;
struct vpu_jsm_msg jsm_msg;
- bool jobs_submitted;
unsigned int timeout;
int ret;
ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
while (!kthread_should_stop()) {
- timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
- jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
- if (!ret) {
+ if (!ret)
ivpu_job_done_message(vdev, &jsm_msg);
- } else if (ret == -ETIMEDOUT) {
- if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
- ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
- }
- }
+
if (kthread_should_park()) {
ivpu_dbg(vdev, JOB, "Parked %s\n", __func__);
kthread_parkme();
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+static unsigned long ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
}
}
+static void ivpu_job_timeout_work(struct work_struct *work)
+{
+ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
+ struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
+ ivpu_hw_diagnose_failure(vdev);
+
+ ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
+{
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ /* No-op if already queued */
+ queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+}
+
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
+{
+ cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+}
+
int ivpu_pm_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
atomic_set(&pm->in_reset, 0);
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+ INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
if (ivpu_disable_recovery)
delay = -1;
void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
{
+ drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
cancel_work_sync(&vdev->pm->recovery_work);
}