2 * Copyright 2021 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/devcoredump.h>
25 #include <generated/utsrelease.h>
27 #include "amdgpu_reset.h"
28 #include "aldebaran.h"
29 #include "sienna_cichlid.h"
30 #include "smu_v13_0_10.h"
32 int amdgpu_reset_init(struct amdgpu_device *adev)
36 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
37 case IP_VERSION(13, 0, 2):
38 case IP_VERSION(13, 0, 6):
39 ret = aldebaran_reset_init(adev);
41 case IP_VERSION(11, 0, 7):
42 ret = sienna_cichlid_reset_init(adev);
44 case IP_VERSION(13, 0, 10):
45 ret = smu_v13_0_10_reset_init(adev);
54 int amdgpu_reset_fini(struct amdgpu_device *adev)
58 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
59 case IP_VERSION(13, 0, 2):
60 case IP_VERSION(13, 0, 6):
61 ret = aldebaran_reset_fini(adev);
63 case IP_VERSION(11, 0, 7):
64 ret = sienna_cichlid_reset_fini(adev);
66 case IP_VERSION(13, 0, 10):
67 ret = smu_v13_0_10_reset_fini(adev);
76 int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
77 struct amdgpu_reset_context *reset_context)
79 struct amdgpu_reset_handler *reset_handler = NULL;
81 if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
82 reset_handler = adev->reset_cntl->get_reset_handler(
83 adev->reset_cntl, reset_context);
87 return reset_handler->prepare_hwcontext(adev->reset_cntl,
91 int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
92 struct amdgpu_reset_context *reset_context)
95 struct amdgpu_reset_handler *reset_handler = NULL;
98 reset_handler = adev->reset_cntl->get_reset_handler(
99 adev->reset_cntl, reset_context);
103 ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
107 return reset_handler->restore_hwcontext(adev->reset_cntl,
112 void amdgpu_reset_destroy_reset_domain(struct kref *ref)
114 struct amdgpu_reset_domain *reset_domain = container_of(ref,
115 struct amdgpu_reset_domain,
117 if (reset_domain->wq)
118 destroy_workqueue(reset_domain->wq);
120 kvfree(reset_domain);
123 struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
126 struct amdgpu_reset_domain *reset_domain;
128 reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
130 DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
134 reset_domain->type = type;
135 kref_init(&reset_domain->refcount);
137 reset_domain->wq = create_singlethread_workqueue(wq_name);
138 if (!reset_domain->wq) {
139 DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
140 amdgpu_reset_put_reset_domain(reset_domain);
145 atomic_set(&reset_domain->in_gpu_reset, 0);
146 atomic_set(&reset_domain->reset_res, 0);
147 init_rwsem(&reset_domain->sem);
152 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
154 atomic_set(&reset_domain->in_gpu_reset, 1);
155 down_write(&reset_domain->sem);
159 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
161 atomic_set(&reset_domain->in_gpu_reset, 0);
162 up_write(&reset_domain->sem);
165 #ifndef CONFIG_DEV_COREDUMP
166 void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
167 struct amdgpu_reset_context *reset_context)
172 amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
173 void *data, size_t datalen)
175 struct drm_printer p;
176 struct amdgpu_coredump_info *coredump = data;
177 struct drm_print_iterator iter;
185 p = drm_coredump_printer(&iter);
187 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
188 drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
189 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
190 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
191 drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec,
192 coredump->reset_time.tv_nsec);
194 if (coredump->reset_task_info.pid)
195 drm_printf(&p, "process_name: %s PID: %d\n",
196 coredump->reset_task_info.process_name,
197 coredump->reset_task_info.pid);
199 if (coredump->reset_vram_lost)
200 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
201 if (coredump->adev->reset_info.num_regs) {
202 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
204 for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
205 drm_printf(&p, "0x%08x: 0x%08x\n",
206 coredump->adev->reset_info.reset_dump_reg_list[i],
207 coredump->adev->reset_info.reset_dump_reg_value[i]);
210 return count - iter.remain;
213 static void amdgpu_devcoredump_free(void *data)
218 void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
219 struct amdgpu_reset_context *reset_context)
221 struct amdgpu_coredump_info *coredump;
222 struct drm_device *dev = adev_to_drm(adev);
224 coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
227 DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
231 coredump->reset_vram_lost = vram_lost;
233 if (reset_context->job && reset_context->job->vm)
234 coredump->reset_task_info = reset_context->job->vm->task_info;
236 coredump->adev = adev;
238 ktime_get_ts64(&coredump->reset_time);
240 dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
241 amdgpu_devcoredump_read, amdgpu_devcoredump_free);