]> Git Repo - J-linux.git/commitdiff
drm/xe: Move the coredump registration to the worker thread
authorJohn Harrison <[email protected]>
Thu, 28 Nov 2024 21:08:23 +0000 (13:08 -0800)
committerThomas Hellström <[email protected]>
Tue, 3 Dec 2024 09:33:13 +0000 (10:33 +0100)
Adding lockdep checking to the coredump code showed that there was an
existing violation. The dev_coredumpm_timeout() call is used to
register the dump with the base coredump subsystem. However, that
makes multiple memory allocations, only some of which use the GFP_
flags passed in. So that also needs to be deferred to the worker
function where it is safe to allocate with arbitrary flags.

In order to not add protoypes for the callback functions, moving the
_timeout call also means moving the worker thread function to later in
the file.

v2: Rebased after other changes to the worker function.

Fixes: e799485044cb ("drm/xe: Introduce the dev_coredump infrastructure.")
Cc: Thomas Hellström <[email protected]>
Cc: Matthew Brost <[email protected]>
Cc: Jani Nikula <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Francois Dugast <[email protected]>
Cc: Rodrigo Vivi <[email protected]>
Cc: Lucas De Marchi <[email protected]>
Cc: "Thomas Hellström" <[email protected]>
Cc: Sumit Semwal <[email protected]>
Cc: "Christian König" <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: <[email protected]> # v6.8+
Signed-off-by: John Harrison <[email protected]>
Reviewed-by: Matthew Brost <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
(cherry picked from commit 90f51a7f4ec1004fc4ddfbc6d1f1068d85ef4771)
Signed-off-by: Thomas Hellström <[email protected]>
drivers/gpu/drm/xe/xe_devcoredump.c

index 0b0cd6aa1d9fb4663454c0d921096a021dbf1de6..f8947e7e917ec585c75944a642939db302d344ac 100644 (file)
@@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
        ss->vm = NULL;
 }
 
-static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
-{
-       struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
-       struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
-       struct xe_device *xe = coredump_to_xe(coredump);
-       unsigned int fw_ref;
-
-       xe_pm_runtime_get(xe);
-
-       /* keep going if fw fails as we still want to save the memory and SW data */
-       fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
-       if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
-               xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
-       xe_vm_snapshot_capture_delayed(ss->vm);
-       xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
-       xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
-
-       xe_pm_runtime_put(xe);
-
-       /* Calculate devcoredump size */
-       ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
-
-       ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
-       if (!ss->read.buffer)
-               return;
-
-       __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
-       xe_devcoredump_snapshot_free(ss);
-}
-
 static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
                                   size_t count, void *data, size_t datalen)
 {
@@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data)
                 "Xe device coredump has been deleted.\n");
 }
 
+static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
+{
+       struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
+       struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
+       struct xe_device *xe = coredump_to_xe(coredump);
+       unsigned int fw_ref;
+
+       /*
+        * NB: Despite passing a GFP_ flags parameter here, more allocations are done
+        * internally using GFP_KERNEL expliictly. Hence this call must be in the worker
+        * thread and not in the initial capture call.
+        */
+       dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
+                             xe_devcoredump_read, xe_devcoredump_free,
+                             XE_COREDUMP_TIMEOUT_JIFFIES);
+
+       xe_pm_runtime_get(xe);
+
+       /* keep going if fw fails as we still want to save the memory and SW data */
+       fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+       if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+               xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+       xe_vm_snapshot_capture_delayed(ss->vm);
+       xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
+       xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
+
+       xe_pm_runtime_put(xe);
+
+       /* Calculate devcoredump size */
+       ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
+
+       ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
+       if (!ss->read.buffer)
+               return;
+
+       __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
+       xe_devcoredump_snapshot_free(ss);
+}
+
 static void devcoredump_snapshot(struct xe_devcoredump *coredump,
                                 struct xe_sched_job *job)
 {
@@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job)
        drm_info(&xe->drm, "Xe device coredump has been created\n");
        drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
                 xe->drm.primary->index);
-
-       dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
-                             xe_devcoredump_read, xe_devcoredump_free,
-                             XE_COREDUMP_TIMEOUT_JIFFIES);
 }
 
 static void xe_driver_devcoredump_fini(void *arg)
This page took 0.051786 seconds and 4 git commands to generate.