]>
Commit | Line | Data |
---|---|---|
fbeb661b YS |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include <linux/types.h> | |
25 | #include <linux/kernel.h> | |
26 | #include <linux/log2.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/slab.h> | |
29 | #include <linux/mutex.h> | |
30 | #include <linux/device.h> | |
31 | ||
32 | #include "kfd_pm4_headers.h" | |
33 | #include "kfd_pm4_headers_diq.h" | |
34 | #include "kfd_kernel_queue.h" | |
35 | #include "kfd_priv.h" | |
36 | #include "kfd_pm4_opcodes.h" | |
37 | #include "cik_regs.h" | |
38 | #include "kfd_dbgmgr.h" | |
39 | #include "kfd_dbgdev.h" | |
40 | #include "kfd_device_queue_manager.h" | |
fbeb661b YS |
41 | |
42 | static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) | |
43 | { | |
fbeb661b YS |
44 | dev->kfd2kgd->address_watch_disable(dev->kgd); |
45 | } | |
46 | ||
788bf83d | 47 | static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, |
c7b6bac9 | 48 | u32 pasid, uint64_t vmid0_address, |
788bf83d YS |
49 | uint32_t *packet_buff, size_t size_in_bytes) |
50 | { | |
51 | struct pm4__release_mem *rm_packet; | |
52 | struct pm4__indirect_buffer_pasid *ib_packet; | |
53 | struct kfd_mem_obj *mem_obj; | |
54 | size_t pq_packets_size_in_bytes; | |
55 | union ULARGE_INTEGER *largep; | |
56 | union ULARGE_INTEGER addr; | |
57 | struct kernel_queue *kq; | |
58 | uint64_t *rm_state; | |
59 | unsigned int *ib_packet_buff; | |
60 | int status; | |
61 | ||
32fa8219 FK |
62 | if (WARN_ON(!size_in_bytes)) |
63 | return -EINVAL; | |
788bf83d YS |
64 | |
65 | kq = dbgdev->kq; | |
66 | ||
67 | pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + | |
68 | sizeof(struct pm4__indirect_buffer_pasid); | |
69 | ||
70 | /* | |
71 | * We acquire a buffer from DIQ | |
72 | * The receive packet buff will be sitting on the Indirect Buffer | |
73 | * and in the PQ we put the IB packet + sync packet(s). | |
74 | */ | |
a5a4d68c | 75 | status = kq_acquire_packet_buffer(kq, |
788bf83d YS |
76 | pq_packets_size_in_bytes / sizeof(uint32_t), |
77 | &ib_packet_buff); | |
4eacc26b | 78 | if (status) { |
a5a4d68c | 79 | pr_err("kq_acquire_packet_buffer failed\n"); |
788bf83d YS |
80 | return status; |
81 | } | |
82 | ||
83 | memset(ib_packet_buff, 0, pq_packets_size_in_bytes); | |
84 | ||
85 | ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); | |
86 | ||
87 | ib_packet->header.count = 3; | |
88 | ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; | |
89 | ib_packet->header.type = PM4_TYPE_3; | |
90 | ||
91 | largep = (union ULARGE_INTEGER *) &vmid0_address; | |
92 | ||
93 | ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; | |
94 | ib_packet->bitfields3.ib_base_hi = largep->u.high_part; | |
95 | ||
96 | ib_packet->control = (1 << 23) | (1 << 31) | | |
6d566930 | 97 | ((size_in_bytes / 4) & 0xfffff); |
788bf83d YS |
98 | |
99 | ib_packet->bitfields5.pasid = pasid; | |
100 | ||
101 | /* | |
102 | * for now we use release mem for GPU-CPU synchronization | |
103 | * Consider WaitRegMem + WriteData as a better alternative | |
104 | * we get a GART allocations ( gpu/cpu mapping), | |
105 | * for the sync variable, and wait until: | |
106 | * (a) Sync with HW | |
107 | * (b) Sync var is written by CP to mem. | |
108 | */ | |
109 | rm_packet = (struct pm4__release_mem *) (ib_packet_buff + | |
110 | (sizeof(struct pm4__indirect_buffer_pasid) / | |
111 | sizeof(unsigned int))); | |
112 | ||
113 | status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), | |
114 | &mem_obj); | |
115 | ||
4eacc26b | 116 | if (status) { |
79775b62 | 117 | pr_err("Failed to allocate GART memory\n"); |
a5a4d68c | 118 | kq_rollback_packet(kq); |
788bf83d YS |
119 | return status; |
120 | } | |
121 | ||
122 | rm_state = (uint64_t *) mem_obj->cpu_ptr; | |
123 | ||
124 | *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; | |
125 | ||
126 | rm_packet->header.opcode = IT_RELEASE_MEM; | |
127 | rm_packet->header.type = PM4_TYPE_3; | |
6d566930 | 128 | rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; |
788bf83d YS |
129 | |
130 | rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; | |
131 | rm_packet->bitfields2.event_index = | |
132 | event_index___release_mem__end_of_pipe; | |
133 | ||
134 | rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; | |
135 | rm_packet->bitfields2.atc = 0; | |
136 | rm_packet->bitfields2.tc_wb_action_ena = 1; | |
137 | ||
138 | addr.quad_part = mem_obj->gpu_addr; | |
139 | ||
140 | rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; | |
141 | rm_packet->address_hi = addr.u.high_part; | |
142 | ||
143 | rm_packet->bitfields3.data_sel = | |
144 | data_sel___release_mem__send_64_bit_data; | |
145 | ||
146 | rm_packet->bitfields3.int_sel = | |
147 | int_sel___release_mem__send_data_after_write_confirm; | |
148 | ||
149 | rm_packet->bitfields3.dst_sel = | |
150 | dst_sel___release_mem__memory_controller; | |
151 | ||
152 | rm_packet->data_lo = QUEUESTATE__ACTIVE; | |
153 | ||
a5a4d68c | 154 | kq_submit_packet(kq); |
788bf83d YS |
155 | |
156 | /* Wait till CP writes sync code: */ | |
157 | status = amdkfd_fence_wait_timeout( | |
e92049ae | 158 | rm_state, |
788bf83d YS |
159 | QUEUESTATE__ACTIVE, 1500); |
160 | ||
161 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
162 | ||
163 | return status; | |
164 | } | |
165 | ||
fbeb661b YS |
166 | static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) |
167 | { | |
fbeb661b YS |
168 | /* |
169 | * no action is needed in this case, | |
170 | * just make sure diq will not be used | |
171 | */ | |
172 | ||
173 | dbgdev->kq = NULL; | |
174 | ||
175 | return 0; | |
176 | } | |
177 | ||
178 | static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) | |
179 | { | |
180 | struct queue_properties properties; | |
181 | unsigned int qid; | |
182 | struct kernel_queue *kq = NULL; | |
183 | int status; | |
184 | ||
e6f791b1 YZ |
185 | properties.type = KFD_QUEUE_TYPE_DIQ; |
186 | ||
fbeb661b | 187 | status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, |
e47a8b52 | 188 | &properties, &qid, NULL); |
fbeb661b YS |
189 | |
190 | if (status) { | |
79775b62 | 191 | pr_err("Failed to create DIQ\n"); |
fbeb661b YS |
192 | return status; |
193 | } | |
194 | ||
195 | pr_debug("DIQ Created with queue id: %d\n", qid); | |
196 | ||
197 | kq = pqm_get_kernel_queue(dbgdev->pqm, qid); | |
198 | ||
4eacc26b | 199 | if (!kq) { |
79775b62 | 200 | pr_err("Error getting DIQ\n"); |
fbeb661b YS |
201 | pqm_destroy_queue(dbgdev->pqm, qid); |
202 | return -EFAULT; | |
203 | } | |
204 | ||
205 | dbgdev->kq = kq; | |
206 | ||
207 | return status; | |
208 | } | |
209 | ||
210 | static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) | |
211 | { | |
fbeb661b YS |
212 | /* disable watch address */ |
213 | dbgdev_address_watch_disable_nodiq(dbgdev->dev); | |
214 | return 0; | |
215 | } | |
216 | ||
217 | static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) | |
218 | { | |
219 | /* todo - disable address watch */ | |
220 | int status; | |
221 | ||
fbeb661b YS |
222 | status = pqm_destroy_queue(dbgdev->pqm, |
223 | dbgdev->kq->queue->properties.queue_id); | |
224 | dbgdev->kq = NULL; | |
225 | ||
226 | return status; | |
227 | } | |
228 | ||
e2e9afc4 YS |
229 | static void dbgdev_address_watch_set_registers( |
230 | const struct dbg_address_watch_info *adw_info, | |
231 | union TCP_WATCH_ADDR_H_BITS *addrHi, | |
232 | union TCP_WATCH_ADDR_L_BITS *addrLo, | |
233 | union TCP_WATCH_CNTL_BITS *cntl, | |
234 | unsigned int index, unsigned int vmid) | |
235 | { | |
236 | union ULARGE_INTEGER addr; | |
237 | ||
e2e9afc4 YS |
238 | addr.quad_part = 0; |
239 | addrHi->u32All = 0; | |
240 | addrLo->u32All = 0; | |
241 | cntl->u32All = 0; | |
242 | ||
4eacc26b | 243 | if (adw_info->watch_mask) |
e2e9afc4 YS |
244 | cntl->bitfields.mask = |
245 | (uint32_t) (adw_info->watch_mask[index] & | |
246 | ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); | |
247 | else | |
248 | cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; | |
249 | ||
250 | addr.quad_part = (unsigned long long) adw_info->watch_address[index]; | |
251 | ||
252 | addrHi->bitfields.addr = addr.u.high_part & | |
253 | ADDRESS_WATCH_REG_ADDHIGH_MASK; | |
254 | addrLo->bitfields.addr = | |
255 | (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); | |
256 | ||
257 | cntl->bitfields.mode = adw_info->watch_mode[index]; | |
258 | cntl->bitfields.vmid = (uint32_t) vmid; | |
259 | /* for now assume it is an ATC address */ | |
260 | cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; | |
261 | ||
262 | pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); | |
263 | pr_debug("\t\t%20s %08x\n", "set reg add high :", | |
264 | addrHi->bitfields.addr); | |
265 | pr_debug("\t\t%20s %08x\n", "set reg add low :", | |
266 | addrLo->bitfields.addr); | |
267 | } | |
268 | ||
269 | static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, | |
79775b62 | 270 | struct dbg_address_watch_info *adw_info) |
e2e9afc4 YS |
271 | { |
272 | union TCP_WATCH_ADDR_H_BITS addrHi; | |
273 | union TCP_WATCH_ADDR_L_BITS addrLo; | |
274 | union TCP_WATCH_CNTL_BITS cntl; | |
275 | struct kfd_process_device *pdd; | |
276 | unsigned int i; | |
277 | ||
e2e9afc4 YS |
278 | /* taking the vmid for that process on the safe way using pdd */ |
279 | pdd = kfd_get_process_device_data(dbgdev->dev, | |
280 | adw_info->process); | |
281 | if (!pdd) { | |
79775b62 | 282 | pr_err("Failed to get pdd for wave control no DIQ\n"); |
e2e9afc4 YS |
283 | return -EFAULT; |
284 | } | |
285 | ||
286 | addrHi.u32All = 0; | |
287 | addrLo.u32All = 0; | |
288 | cntl.u32All = 0; | |
289 | ||
290 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || | |
291 | (adw_info->num_watch_points == 0)) { | |
79775b62 | 292 | pr_err("num_watch_points is invalid\n"); |
e2e9afc4 YS |
293 | return -EINVAL; |
294 | } | |
295 | ||
4eacc26b | 296 | if (!adw_info->watch_mode || !adw_info->watch_address) { |
79775b62 | 297 | pr_err("adw_info fields are not valid\n"); |
e2e9afc4 YS |
298 | return -EINVAL; |
299 | } | |
300 | ||
8eabaf54 | 301 | for (i = 0; i < adw_info->num_watch_points; i++) { |
e2e9afc4 YS |
302 | dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, |
303 | &cntl, i, pdd->qpd.vmid); | |
304 | ||
305 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
306 | pr_debug("\t\t%20s %08x\n", "register index :", i); | |
307 | pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); | |
308 | pr_debug("\t\t%20s %08x\n", "Address Low is :", | |
309 | addrLo.bitfields.addr); | |
310 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
311 | addrHi.bitfields.addr); | |
312 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
313 | addrHi.bitfields.addr); | |
314 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", | |
315 | cntl.bitfields.mask); | |
316 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", | |
317 | cntl.bitfields.mode); | |
318 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", | |
319 | cntl.bitfields.vmid); | |
320 | pr_debug("\t\t%20s %08x\n", "Control atc is :", | |
321 | cntl.bitfields.atc); | |
322 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
323 | ||
324 | pdd->dev->kfd2kgd->address_watch_execute( | |
325 | dbgdev->dev->kgd, | |
326 | i, | |
327 | cntl.u32All, | |
328 | addrHi.u32All, | |
329 | addrLo.u32All); | |
330 | } | |
331 | ||
332 | return 0; | |
333 | } | |
334 | ||
335 | static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, | |
79775b62 | 336 | struct dbg_address_watch_info *adw_info) |
e2e9afc4 YS |
337 | { |
338 | struct pm4__set_config_reg *packets_vec; | |
339 | union TCP_WATCH_ADDR_H_BITS addrHi; | |
340 | union TCP_WATCH_ADDR_L_BITS addrLo; | |
341 | union TCP_WATCH_CNTL_BITS cntl; | |
342 | struct kfd_mem_obj *mem_obj; | |
343 | unsigned int aw_reg_add_dword; | |
344 | uint32_t *packet_buff_uint; | |
345 | unsigned int i; | |
346 | int status; | |
347 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; | |
348 | /* we do not control the vmid in DIQ mode, just a place holder */ | |
349 | unsigned int vmid = 0; | |
350 | ||
e2e9afc4 YS |
351 | addrHi.u32All = 0; |
352 | addrLo.u32All = 0; | |
353 | cntl.u32All = 0; | |
354 | ||
355 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || | |
356 | (adw_info->num_watch_points == 0)) { | |
79775b62 | 357 | pr_err("num_watch_points is invalid\n"); |
e2e9afc4 YS |
358 | return -EINVAL; |
359 | } | |
360 | ||
4eacc26b | 361 | if (!adw_info->watch_mode || !adw_info->watch_address) { |
79775b62 | 362 | pr_err("adw_info fields are not valid\n"); |
e2e9afc4 YS |
363 | return -EINVAL; |
364 | } | |
365 | ||
366 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); | |
367 | ||
4eacc26b | 368 | if (status) { |
79775b62 | 369 | pr_err("Failed to allocate GART memory\n"); |
e2e9afc4 YS |
370 | return status; |
371 | } | |
372 | ||
373 | packet_buff_uint = mem_obj->cpu_ptr; | |
374 | ||
375 | memset(packet_buff_uint, 0, ib_size); | |
376 | ||
377 | packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); | |
378 | ||
379 | packets_vec[0].header.count = 1; | |
380 | packets_vec[0].header.opcode = IT_SET_CONFIG_REG; | |
381 | packets_vec[0].header.type = PM4_TYPE_3; | |
382 | packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; | |
383 | packets_vec[0].bitfields2.insert_vmid = 1; | |
384 | packets_vec[1].ordinal1 = packets_vec[0].ordinal1; | |
385 | packets_vec[1].bitfields2.insert_vmid = 0; | |
386 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; | |
387 | packets_vec[2].bitfields2.insert_vmid = 0; | |
388 | packets_vec[3].ordinal1 = packets_vec[0].ordinal1; | |
389 | packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; | |
390 | packets_vec[3].bitfields2.insert_vmid = 1; | |
391 | ||
392 | for (i = 0; i < adw_info->num_watch_points; i++) { | |
393 | dbgdev_address_watch_set_registers(adw_info, | |
394 | &addrHi, | |
395 | &addrLo, | |
396 | &cntl, | |
397 | i, | |
398 | vmid); | |
399 | ||
400 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
401 | pr_debug("\t\t%20s %08x\n", "register index :", i); | |
402 | pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); | |
403 | pr_debug("\t\t%20s %p\n", "Add ptr is :", | |
404 | adw_info->watch_address); | |
405 | pr_debug("\t\t%20s %08llx\n", "Add is :", | |
406 | adw_info->watch_address[i]); | |
407 | pr_debug("\t\t%20s %08x\n", "Address Low is :", | |
408 | addrLo.bitfields.addr); | |
409 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
410 | addrHi.bitfields.addr); | |
411 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", | |
412 | cntl.bitfields.mask); | |
413 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", | |
414 | cntl.bitfields.mode); | |
415 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", | |
416 | cntl.bitfields.vmid); | |
417 | pr_debug("\t\t%20s %08x\n", "Control atc is :", | |
418 | cntl.bitfields.atc); | |
419 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
420 | ||
421 | aw_reg_add_dword = | |
422 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
423 | dbgdev->dev->kgd, | |
424 | i, | |
425 | ADDRESS_WATCH_REG_CNTL); | |
426 | ||
e2e9afc4 | 427 | packets_vec[0].bitfields2.reg_offset = |
f4e04022 | 428 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
429 | |
430 | packets_vec[0].reg_data[0] = cntl.u32All; | |
431 | ||
432 | aw_reg_add_dword = | |
433 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
434 | dbgdev->dev->kgd, | |
435 | i, | |
436 | ADDRESS_WATCH_REG_ADDR_HI); | |
437 | ||
e2e9afc4 | 438 | packets_vec[1].bitfields2.reg_offset = |
f4e04022 | 439 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
440 | packets_vec[1].reg_data[0] = addrHi.u32All; |
441 | ||
442 | aw_reg_add_dword = | |
443 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
444 | dbgdev->dev->kgd, | |
445 | i, | |
446 | ADDRESS_WATCH_REG_ADDR_LO); | |
447 | ||
e2e9afc4 | 448 | packets_vec[2].bitfields2.reg_offset = |
f4e04022 | 449 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
450 | packets_vec[2].reg_data[0] = addrLo.u32All; |
451 | ||
452 | /* enable watch flag if address is not zero*/ | |
453 | if (adw_info->watch_address[i] > 0) | |
454 | cntl.bitfields.valid = 1; | |
455 | else | |
456 | cntl.bitfields.valid = 0; | |
457 | ||
458 | aw_reg_add_dword = | |
459 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
460 | dbgdev->dev->kgd, | |
461 | i, | |
462 | ADDRESS_WATCH_REG_CNTL); | |
463 | ||
e2e9afc4 | 464 | packets_vec[3].bitfields2.reg_offset = |
f4e04022 | 465 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
466 | packets_vec[3].reg_data[0] = cntl.u32All; |
467 | ||
468 | status = dbgdev_diq_submit_ib( | |
469 | dbgdev, | |
470 | adw_info->process->pasid, | |
471 | mem_obj->gpu_addr, | |
472 | packet_buff_uint, | |
473 | ib_size); | |
474 | ||
4eacc26b | 475 | if (status) { |
79775b62 | 476 | pr_err("Failed to submit IB to DIQ\n"); |
e2e9afc4 YS |
477 | break; |
478 | } | |
479 | } | |
480 | ||
481 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
482 | return status; | |
483 | } | |
484 | ||
788bf83d YS |
485 | static int dbgdev_wave_control_set_registers( |
486 | struct dbg_wave_control_info *wac_info, | |
487 | union SQ_CMD_BITS *in_reg_sq_cmd, | |
488 | union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) | |
489 | { | |
93fce954 | 490 | int status = 0; |
788bf83d YS |
491 | union SQ_CMD_BITS reg_sq_cmd; |
492 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
493 | struct HsaDbgWaveMsgAMDGen2 *pMsg; | |
494 | ||
788bf83d YS |
495 | reg_sq_cmd.u32All = 0; |
496 | reg_gfx_index.u32All = 0; | |
497 | pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; | |
498 | ||
499 | switch (wac_info->mode) { | |
500 | /* Send command to single wave */ | |
501 | case HSA_DBG_WAVEMODE_SINGLE: | |
502 | /* | |
503 | * Limit access to the process waves only, | |
504 | * by setting vmid check | |
505 | */ | |
506 | reg_sq_cmd.bits.check_vmid = 1; | |
507 | reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; | |
508 | reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; | |
509 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; | |
510 | ||
511 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; | |
512 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; | |
513 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; | |
514 | ||
515 | break; | |
516 | ||
517 | /* Send command to all waves with matching VMID */ | |
518 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: | |
519 | ||
520 | reg_gfx_index.bits.sh_broadcast_writes = 1; | |
521 | reg_gfx_index.bits.se_broadcast_writes = 1; | |
522 | reg_gfx_index.bits.instance_broadcast_writes = 1; | |
523 | ||
524 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; | |
525 | ||
526 | break; | |
527 | ||
528 | /* Send command to all CU waves with matching VMID */ | |
529 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: | |
530 | ||
531 | reg_sq_cmd.bits.check_vmid = 1; | |
532 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; | |
533 | ||
534 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; | |
535 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; | |
536 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; | |
537 | ||
538 | break; | |
539 | ||
540 | default: | |
541 | return -EINVAL; | |
542 | } | |
543 | ||
544 | switch (wac_info->operand) { | |
545 | case HSA_DBG_WAVEOP_HALT: | |
546 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; | |
547 | break; | |
548 | ||
549 | case HSA_DBG_WAVEOP_RESUME: | |
550 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; | |
551 | break; | |
552 | ||
553 | case HSA_DBG_WAVEOP_KILL: | |
554 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; | |
555 | break; | |
556 | ||
557 | case HSA_DBG_WAVEOP_DEBUG: | |
558 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; | |
559 | break; | |
560 | ||
561 | case HSA_DBG_WAVEOP_TRAP: | |
562 | if (wac_info->trapId < MAX_TRAPID) { | |
563 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; | |
564 | reg_sq_cmd.bits.trap_id = wac_info->trapId; | |
565 | } else { | |
566 | status = -EINVAL; | |
567 | } | |
568 | break; | |
569 | ||
570 | default: | |
571 | status = -EINVAL; | |
572 | break; | |
573 | } | |
574 | ||
575 | if (status == 0) { | |
576 | *in_reg_sq_cmd = reg_sq_cmd; | |
577 | *in_reg_gfx_index = reg_gfx_index; | |
578 | } | |
579 | ||
580 | return status; | |
581 | } | |
582 | ||
583 | static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, | |
584 | struct dbg_wave_control_info *wac_info) | |
585 | { | |
586 | ||
587 | int status; | |
588 | union SQ_CMD_BITS reg_sq_cmd; | |
589 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
590 | struct kfd_mem_obj *mem_obj; | |
591 | uint32_t *packet_buff_uint; | |
592 | struct pm4__set_config_reg *packets_vec; | |
593 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; | |
594 | ||
788bf83d YS |
595 | reg_sq_cmd.u32All = 0; |
596 | ||
597 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, | |
598 | ®_gfx_index); | |
599 | if (status) { | |
79775b62 | 600 | pr_err("Failed to set wave control registers\n"); |
788bf83d YS |
601 | return status; |
602 | } | |
603 | ||
8eabaf54 | 604 | /* we do not control the VMID in DIQ, so reset it to a known value */ |
788bf83d YS |
605 | reg_sq_cmd.bits.vm_id = 0; |
606 | ||
607 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
608 | ||
609 | pr_debug("\t\t mode is: %u\n", wac_info->mode); | |
610 | pr_debug("\t\t operand is: %u\n", wac_info->operand); | |
611 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); | |
612 | pr_debug("\t\t msg value is: %u\n", | |
613 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); | |
614 | pr_debug("\t\t vmid is: N/A\n"); | |
615 | ||
616 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); | |
617 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); | |
618 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); | |
619 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); | |
620 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); | |
621 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); | |
622 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); | |
623 | ||
624 | pr_debug("\t\t ibw is : %u\n", | |
625 | reg_gfx_index.bitfields.instance_broadcast_writes); | |
626 | pr_debug("\t\t ii is : %u\n", | |
627 | reg_gfx_index.bitfields.instance_index); | |
628 | pr_debug("\t\t sebw is : %u\n", | |
629 | reg_gfx_index.bitfields.se_broadcast_writes); | |
630 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); | |
631 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); | |
632 | pr_debug("\t\t sbw is : %u\n", | |
633 | reg_gfx_index.bitfields.sh_broadcast_writes); | |
634 | ||
635 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
636 | ||
637 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); | |
638 | ||
639 | if (status != 0) { | |
79775b62 | 640 | pr_err("Failed to allocate GART memory\n"); |
788bf83d YS |
641 | return status; |
642 | } | |
643 | ||
644 | packet_buff_uint = mem_obj->cpu_ptr; | |
645 | ||
646 | memset(packet_buff_uint, 0, ib_size); | |
647 | ||
648 | packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; | |
649 | packets_vec[0].header.count = 1; | |
650 | packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; | |
651 | packets_vec[0].header.type = PM4_TYPE_3; | |
652 | packets_vec[0].bitfields2.reg_offset = | |
6d566930 | 653 | GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; |
788bf83d YS |
654 | |
655 | packets_vec[0].bitfields2.insert_vmid = 0; | |
656 | packets_vec[0].reg_data[0] = reg_gfx_index.u32All; | |
657 | ||
658 | packets_vec[1].header.count = 1; | |
659 | packets_vec[1].header.opcode = IT_SET_CONFIG_REG; | |
660 | packets_vec[1].header.type = PM4_TYPE_3; | |
6d566930 | 661 | packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; |
788bf83d YS |
662 | |
663 | packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; | |
664 | packets_vec[1].bitfields2.insert_vmid = 1; | |
665 | packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; | |
666 | ||
667 | /* Restore the GRBM_GFX_INDEX register */ | |
668 | ||
669 | reg_gfx_index.u32All = 0; | |
670 | reg_gfx_index.bits.sh_broadcast_writes = 1; | |
671 | reg_gfx_index.bits.instance_broadcast_writes = 1; | |
672 | reg_gfx_index.bits.se_broadcast_writes = 1; | |
673 | ||
674 | ||
675 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; | |
676 | packets_vec[2].bitfields2.reg_offset = | |
6d566930 | 677 | GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; |
788bf83d YS |
678 | |
679 | packets_vec[2].bitfields2.insert_vmid = 0; | |
680 | packets_vec[2].reg_data[0] = reg_gfx_index.u32All; | |
681 | ||
682 | status = dbgdev_diq_submit_ib( | |
683 | dbgdev, | |
684 | wac_info->process->pasid, | |
685 | mem_obj->gpu_addr, | |
686 | packet_buff_uint, | |
687 | ib_size); | |
688 | ||
4eacc26b | 689 | if (status) |
79775b62 | 690 | pr_err("Failed to submit IB to DIQ\n"); |
788bf83d YS |
691 | |
692 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
693 | ||
694 | return status; | |
695 | } | |
696 | ||
697 | static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, | |
698 | struct dbg_wave_control_info *wac_info) | |
699 | { | |
700 | int status; | |
701 | union SQ_CMD_BITS reg_sq_cmd; | |
702 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
703 | struct kfd_process_device *pdd; | |
704 | ||
788bf83d YS |
705 | reg_sq_cmd.u32All = 0; |
706 | ||
707 | /* taking the VMID for that process on the safe way using PDD */ | |
708 | pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); | |
709 | ||
710 | if (!pdd) { | |
79775b62 | 711 | pr_err("Failed to get pdd for wave control no DIQ\n"); |
788bf83d YS |
712 | return -EFAULT; |
713 | } | |
714 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, | |
715 | ®_gfx_index); | |
716 | if (status) { | |
79775b62 | 717 | pr_err("Failed to set wave control registers\n"); |
788bf83d YS |
718 | return status; |
719 | } | |
720 | ||
721 | /* for non DIQ we need to patch the VMID: */ | |
722 | ||
723 | reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; | |
724 | ||
725 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
726 | ||
727 | pr_debug("\t\t mode is: %u\n", wac_info->mode); | |
728 | pr_debug("\t\t operand is: %u\n", wac_info->operand); | |
729 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); | |
730 | pr_debug("\t\t msg value is: %u\n", | |
731 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); | |
732 | pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); | |
733 | ||
734 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); | |
735 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); | |
736 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); | |
737 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); | |
738 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); | |
739 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); | |
740 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); | |
741 | ||
742 | pr_debug("\t\t ibw is : %u\n", | |
743 | reg_gfx_index.bitfields.instance_broadcast_writes); | |
744 | pr_debug("\t\t ii is : %u\n", | |
745 | reg_gfx_index.bitfields.instance_index); | |
746 | pr_debug("\t\t sebw is : %u\n", | |
747 | reg_gfx_index.bitfields.se_broadcast_writes); | |
748 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); | |
749 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); | |
750 | pr_debug("\t\t sbw is : %u\n", | |
751 | reg_gfx_index.bitfields.sh_broadcast_writes); | |
752 | ||
753 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
754 | ||
755 | return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, | |
756 | reg_gfx_index.u32All, | |
757 | reg_sq_cmd.u32All); | |
758 | } | |
759 | ||
c3447e81 BG |
760 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) |
761 | { | |
762 | int status = 0; | |
763 | unsigned int vmid; | |
56fc40ab | 764 | uint16_t queried_pasid; |
c3447e81 BG |
765 | union SQ_CMD_BITS reg_sq_cmd; |
766 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
767 | struct kfd_process_device *pdd; | |
768 | struct dbg_wave_control_info wac_info; | |
44008d7a YZ |
769 | int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; |
770 | int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; | |
c3447e81 BG |
771 | |
772 | reg_sq_cmd.u32All = 0; | |
773 | status = 0; | |
774 | ||
775 | wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; | |
776 | wac_info.operand = HSA_DBG_WAVEOP_KILL; | |
777 | ||
778 | pr_debug("Killing all process wavefronts\n"); | |
779 | ||
780 | /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. | |
781 | * ATC_VMID15_PASID_MAPPING | |
8eabaf54 KR |
782 | * to check which VMID the current process is mapped to. |
783 | */ | |
c3447e81 BG |
784 | |
785 | for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { | |
56fc40ab YZ |
786 | status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info |
787 | (dev->kgd, vmid, &queried_pasid); | |
788 | ||
789 | if (status && queried_pasid == p->pasid) { | |
790 | pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", | |
791 | vmid, p->pasid); | |
792 | break; | |
c3447e81 BG |
793 | } |
794 | } | |
795 | ||
796 | if (vmid > last_vmid_to_scan) { | |
6027b1bf | 797 | pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); |
c3447e81 BG |
798 | return -EFAULT; |
799 | } | |
800 | ||
801 | /* taking the VMID for that process on the safe way using PDD */ | |
802 | pdd = kfd_get_process_device_data(dev, p); | |
803 | if (!pdd) | |
804 | return -EFAULT; | |
805 | ||
806 | status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, | |
807 | ®_gfx_index); | |
808 | if (status != 0) | |
809 | return -EINVAL; | |
810 | ||
811 | /* for non DIQ we need to patch the VMID: */ | |
812 | reg_sq_cmd.bits.vm_id = vmid; | |
813 | ||
814 | dev->kfd2kgd->wave_control_execute(dev->kgd, | |
815 | reg_gfx_index.u32All, | |
816 | reg_sq_cmd.u32All); | |
817 | ||
818 | return 0; | |
819 | } | |
820 | ||
fbeb661b YS |
821 | void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, |
822 | enum DBGDEV_TYPE type) | |
823 | { | |
fbeb661b YS |
824 | pdbgdev->dev = pdev; |
825 | pdbgdev->kq = NULL; | |
826 | pdbgdev->type = type; | |
827 | pdbgdev->pqm = NULL; | |
828 | ||
829 | switch (type) { | |
830 | case DBGDEV_TYPE_NODIQ: | |
831 | pdbgdev->dbgdev_register = dbgdev_register_nodiq; | |
832 | pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; | |
788bf83d | 833 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; |
e2e9afc4 | 834 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; |
fbeb661b YS |
835 | break; |
836 | case DBGDEV_TYPE_DIQ: | |
837 | default: | |
838 | pdbgdev->dbgdev_register = dbgdev_register_diq; | |
839 | pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; | |
788bf83d | 840 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; |
e2e9afc4 | 841 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; |
fbeb661b YS |
842 | break; |
843 | } | |
844 | ||
845 | } |