]> Git Repo - J-linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_process_queue_manager.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/slab.h>
26 #include <linux/list.h>
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_priv.h"
29 #include "kfd_kernel_queue.h"
30 #include "amdgpu_amdkfd.h"
31 #include "amdgpu_reset.h"
32
33 static inline struct process_queue_node *get_queue_by_qid(
34                         struct process_queue_manager *pqm, unsigned int qid)
35 {
36         struct process_queue_node *pqn;
37
38         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
39                 if ((pqn->q && pqn->q->properties.queue_id == qid) ||
40                     (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
41                         return pqn;
42         }
43
44         return NULL;
45 }
46
47 static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
48                                     unsigned int qid)
49 {
50         if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
51                 return -EINVAL;
52
53         if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
54                 pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
55                 return -ENOSPC;
56         }
57
58         return 0;
59 }
60
61 static int find_available_queue_slot(struct process_queue_manager *pqm,
62                                         unsigned int *qid)
63 {
64         unsigned long found;
65
66         found = find_first_zero_bit(pqm->queue_slot_bitmap,
67                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
68
69         pr_debug("The new slot id %lu\n", found);
70
71         if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
72                 pr_info("Cannot open more queues for process with pasid 0x%x\n",
73                                 pqm->process->pasid);
74                 return -ENOMEM;
75         }
76
77         set_bit(found, pqm->queue_slot_bitmap);
78         *qid = found;
79
80         return 0;
81 }
82
83 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
84 {
85         struct kfd_node *dev = pdd->dev;
86
87         if (pdd->already_dequeued)
88                 return;
89
90         dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
91         if (dev->kfd->shared_resources.enable_mes &&
92             down_read_trylock(&dev->adev->reset_domain->sem)) {
93                 amdgpu_mes_flush_shader_debugger(dev->adev,
94                                                  pdd->proc_ctx_gpu_addr);
95                 up_read(&dev->adev->reset_domain->sem);
96         }
97         pdd->already_dequeued = true;
98 }
99
100 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
101                         void *gws)
102 {
103         struct mqd_update_info minfo = {0};
104         struct kfd_node *dev = NULL;
105         struct process_queue_node *pqn;
106         struct kfd_process_device *pdd;
107         struct kgd_mem *mem = NULL;
108         int ret;
109
110         pqn = get_queue_by_qid(pqm, qid);
111         if (!pqn) {
112                 pr_err("Queue id does not match any known queue\n");
113                 return -EINVAL;
114         }
115
116         if (pqn->q)
117                 dev = pqn->q->device;
118         if (WARN_ON(!dev))
119                 return -ENODEV;
120
121         pdd = kfd_get_process_device_data(dev, pqm->process);
122         if (!pdd) {
123                 pr_err("Process device data doesn't exist\n");
124                 return -EINVAL;
125         }
126
127         /* Only allow one queue per process can have GWS assigned */
128         if (gws && pdd->qpd.num_gws)
129                 return -EBUSY;
130
131         if (!gws && pdd->qpd.num_gws == 0)
132                 return -EINVAL;
133
134         if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
135             KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
136             !dev->kfd->shared_resources.enable_mes) {
137                 if (gws)
138                         ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
139                                 gws, &mem);
140                 else
141                         ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
142                                 pqn->q->gws);
143                 if (unlikely(ret))
144                         return ret;
145                 pqn->q->gws = mem;
146         } else {
147                 /*
148                  * Intentionally set GWS to a non-NULL value
149                  * for devices that do not use GWS for global wave
150                  * synchronization but require the formality
151                  * of setting GWS for cooperative groups.
152                  */
153                 pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
154         }
155
156         pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
157         minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
158
159         return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
160                                                         pqn->q, &minfo);
161 }
162
163 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
164 {
165         int i;
166
167         for (i = 0; i < p->n_pdds; i++)
168                 kfd_process_dequeue_from_device(p->pdds[i]);
169 }
170
171 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
172 {
173         INIT_LIST_HEAD(&pqm->queues);
174         pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
175                                                GFP_KERNEL);
176         if (!pqm->queue_slot_bitmap)
177                 return -ENOMEM;
178         pqm->process = p;
179
180         return 0;
181 }
182
183 static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
184                                      struct process_queue_node *pqn)
185 {
186         struct kfd_node *dev;
187         struct kfd_process_device *pdd;
188
189         dev = pqn->q->device;
190
191         pdd = kfd_get_process_device_data(dev, pqm->process);
192         if (!pdd) {
193                 pr_err("Process device data doesn't exist\n");
194                 return;
195         }
196
197         if (pqn->q->gws) {
198                 if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
199                     KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
200                     !dev->kfd->shared_resources.enable_mes)
201                         amdgpu_amdkfd_remove_gws_from_process(
202                                 pqm->process->kgd_process_info, pqn->q->gws);
203                 pdd->qpd.num_gws = 0;
204         }
205
206         if (dev->kfd->shared_resources.enable_mes) {
207                 amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
208                 amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
209         }
210 }
211
212 void pqm_uninit(struct process_queue_manager *pqm)
213 {
214         struct process_queue_node *pqn, *next;
215
216         list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
217                 if (pqn->q) {
218                         struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
219                                                                                      pqm->process);
220                         if (pdd) {
221                                 kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
222                                 kfd_queue_release_buffers(pdd, &pqn->q->properties);
223                         } else {
224                                 WARN_ON(!pdd);
225                         }
226                         pqm_clean_queue_resource(pqm, pqn);
227                 }
228
229                 kfd_procfs_del_queue(pqn->q);
230                 uninit_queue(pqn->q);
231                 list_del(&pqn->process_queue_list);
232                 kfree(pqn);
233         }
234
235         bitmap_free(pqm->queue_slot_bitmap);
236         pqm->queue_slot_bitmap = NULL;
237 }
238
239 static int init_user_queue(struct process_queue_manager *pqm,
240                                 struct kfd_node *dev, struct queue **q,
241                                 struct queue_properties *q_properties,
242                                 unsigned int qid)
243 {
244         int retval;
245
246         /* Doorbell initialized in user space*/
247         q_properties->doorbell_ptr = NULL;
248         q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
249
250         /* let DQM handle it*/
251         q_properties->vmid = 0;
252         q_properties->queue_id = qid;
253
254         retval = init_queue(q, q_properties);
255         if (retval != 0)
256                 return retval;
257
258         (*q)->device = dev;
259         (*q)->process = pqm->process;
260
261         if (dev->kfd->shared_resources.enable_mes) {
262                 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
263                                                 AMDGPU_MES_GANG_CTX_SIZE,
264                                                 &(*q)->gang_ctx_bo,
265                                                 &(*q)->gang_ctx_gpu_addr,
266                                                 &(*q)->gang_ctx_cpu_ptr,
267                                                 false);
268                 if (retval) {
269                         pr_err("failed to allocate gang context bo\n");
270                         goto cleanup;
271                 }
272                 memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
273
274                 /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
275                  * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
276                  */
277                 if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
278                     >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
279                         if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
280                                 pr_err("Queue memory allocated to wrong device\n");
281                                 retval = -EINVAL;
282                                 goto free_gang_ctx_bo;
283                         }
284
285                         retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
286                                                                   &(*q)->wptr_bo_gart);
287                         if (retval) {
288                                 pr_err("Failed to map wptr bo to GART\n");
289                                 goto free_gang_ctx_bo;
290                         }
291                 }
292         }
293
294         pr_debug("PQM After init queue");
295         return 0;
296
297 free_gang_ctx_bo:
298         amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo);
299 cleanup:
300         uninit_queue(*q);
301         *q = NULL;
302         return retval;
303 }
304
305 int pqm_create_queue(struct process_queue_manager *pqm,
306                             struct kfd_node *dev,
307                             struct queue_properties *properties,
308                             unsigned int *qid,
309                             const struct kfd_criu_queue_priv_data *q_data,
310                             const void *restore_mqd,
311                             const void *restore_ctl_stack,
312                             uint32_t *p_doorbell_offset_in_process)
313 {
314         int retval;
315         struct kfd_process_device *pdd;
316         struct queue *q;
317         struct process_queue_node *pqn;
318         struct kernel_queue *kq;
319         enum kfd_queue_type type = properties->type;
320         unsigned int max_queues = 127; /* HWS limit */
321
322         /*
323          * On GFX 9.4.3, increase the number of queues that
324          * can be created to 255. No HWS limit on GFX 9.4.3.
325          */
326         if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
327             KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
328                 max_queues = 255;
329
330         q = NULL;
331         kq = NULL;
332
333         pdd = kfd_get_process_device_data(dev, pqm->process);
334         if (!pdd) {
335                 pr_err("Process device data doesn't exist\n");
336                 return -1;
337         }
338
339         /*
340          * for debug process, verify that it is within the static queues limit
341          * currently limit is set to half of the total avail HQD slots
342          * If we are just about to create DIQ, the is_debug flag is not set yet
343          * Hence we also check the type as well
344          */
345         if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
346                 max_queues = dev->kfd->device_info.max_no_of_hqd/2;
347
348         if (pdd->qpd.queue_count >= max_queues)
349                 return -ENOSPC;
350
351         if (q_data) {
352                 retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
353                 *qid = q_data->q_id;
354         } else
355                 retval = find_available_queue_slot(pqm, qid);
356
357         if (retval != 0)
358                 return retval;
359
360         if (list_empty(&pdd->qpd.queues_list) &&
361             list_empty(&pdd->qpd.priv_queue_list))
362                 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
363
364         pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
365         if (!pqn) {
366                 retval = -ENOMEM;
367                 goto err_allocate_pqn;
368         }
369
370         switch (type) {
371         case KFD_QUEUE_TYPE_SDMA:
372         case KFD_QUEUE_TYPE_SDMA_XGMI:
373         case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
374                 /* SDMA queues are always allocated statically no matter
375                  * which scheduler mode is used. We also do not need to
376                  * check whether a SDMA queue can be allocated here, because
377                  * allocate_sdma_queue() in create_queue() has the
378                  * corresponding check logic.
379                  */
380                 retval = init_user_queue(pqm, dev, &q, properties, *qid);
381                 if (retval != 0)
382                         goto err_create_queue;
383                 pqn->q = q;
384                 pqn->kq = NULL;
385                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
386                                                     restore_mqd, restore_ctl_stack);
387                 print_queue(q);
388                 break;
389
390         case KFD_QUEUE_TYPE_COMPUTE:
391                 /* check if there is over subscription */
392                 if ((dev->dqm->sched_policy ==
393                      KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
394                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
395                 (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
396                         pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
397                         retval = -EPERM;
398                         goto err_create_queue;
399                 }
400
401                 retval = init_user_queue(pqm, dev, &q, properties, *qid);
402                 if (retval != 0)
403                         goto err_create_queue;
404                 pqn->q = q;
405                 pqn->kq = NULL;
406                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
407                                                     restore_mqd, restore_ctl_stack);
408                 print_queue(q);
409                 break;
410         case KFD_QUEUE_TYPE_DIQ:
411                 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
412                 if (!kq) {
413                         retval = -ENOMEM;
414                         goto err_create_queue;
415                 }
416                 kq->queue->properties.queue_id = *qid;
417                 pqn->kq = kq;
418                 pqn->q = NULL;
419                 retval = kfd_process_drain_interrupts(pdd);
420                 if (retval)
421                         break;
422
423                 retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
424                                                         kq, &pdd->qpd);
425                 break;
426         default:
427                 WARN(1, "Invalid queue type %d", type);
428                 retval = -EINVAL;
429         }
430
431         if (retval != 0) {
432                 pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
433                         pqm->process->pasid, type, retval);
434                 goto err_create_queue;
435         }
436
437         if (q && p_doorbell_offset_in_process) {
438                 /* Return the doorbell offset within the doorbell page
439                  * to the caller so it can be passed up to user mode
440                  * (in bytes).
441                  * relative doorbell index = Absolute doorbell index -
442                  * absolute index of first doorbell in the page.
443                  */
444                 uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
445                                                                        pdd->qpd.proc_doorbells,
446                                                                        0,
447                                                                        pdd->dev->kfd->device_info.doorbell_size);
448
449                 *p_doorbell_offset_in_process = (q->properties.doorbell_off
450                                                 - first_db_index) * sizeof(uint32_t);
451         }
452
453         pr_debug("PQM After DQM create queue\n");
454
455         list_add(&pqn->process_queue_list, &pqm->queues);
456
457         if (q) {
458                 pr_debug("PQM done creating queue\n");
459                 kfd_procfs_add_queue(q);
460                 print_queue_properties(&q->properties);
461         }
462
463         return retval;
464
465 err_create_queue:
466         uninit_queue(q);
467         if (kq)
468                 kernel_queue_uninit(kq);
469         kfree(pqn);
470 err_allocate_pqn:
471         /* check if queues list is empty unregister process from device */
472         clear_bit(*qid, pqm->queue_slot_bitmap);
473         if (list_empty(&pdd->qpd.queues_list) &&
474             list_empty(&pdd->qpd.priv_queue_list))
475                 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
476         return retval;
477 }
478
479 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
480 {
481         struct process_queue_node *pqn;
482         struct kfd_process_device *pdd;
483         struct device_queue_manager *dqm;
484         struct kfd_node *dev;
485         int retval;
486
487         dqm = NULL;
488
489         retval = 0;
490
491         pqn = get_queue_by_qid(pqm, qid);
492         if (!pqn) {
493                 pr_err("Queue id does not match any known queue\n");
494                 return -EINVAL;
495         }
496
497         dev = NULL;
498         if (pqn->kq)
499                 dev = pqn->kq->dev;
500         if (pqn->q)
501                 dev = pqn->q->device;
502         if (WARN_ON(!dev))
503                 return -ENODEV;
504
505         pdd = kfd_get_process_device_data(dev, pqm->process);
506         if (!pdd) {
507                 pr_err("Process device data doesn't exist\n");
508                 return -1;
509         }
510
511         if (pqn->kq) {
512                 /* destroy kernel queue (DIQ) */
513                 dqm = pqn->kq->dev->dqm;
514                 dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
515                 kernel_queue_uninit(pqn->kq);
516         }
517
518         if (pqn->q) {
519                 retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
520                 if (retval)
521                         goto err_destroy_queue;
522
523                 dqm = pqn->q->device->dqm;
524                 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
525                 if (retval) {
526                         pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
527                                 pqm->process->pasid,
528                                 pqn->q->properties.queue_id, retval);
529                         if (retval != -ETIME)
530                                 goto err_destroy_queue;
531                 }
532                 kfd_procfs_del_queue(pqn->q);
533                 kfd_queue_release_buffers(pdd, &pqn->q->properties);
534                 pqm_clean_queue_resource(pqm, pqn);
535                 uninit_queue(pqn->q);
536         }
537
538         list_del(&pqn->process_queue_list);
539         kfree(pqn);
540         clear_bit(qid, pqm->queue_slot_bitmap);
541
542         if (list_empty(&pdd->qpd.queues_list) &&
543             list_empty(&pdd->qpd.priv_queue_list))
544                 dqm->ops.unregister_process(dqm, &pdd->qpd);
545
546 err_destroy_queue:
547         return retval;
548 }
549
550 int pqm_update_queue_properties(struct process_queue_manager *pqm,
551                                 unsigned int qid, struct queue_properties *p)
552 {
553         int retval;
554         struct process_queue_node *pqn;
555
556         pqn = get_queue_by_qid(pqm, qid);
557         if (!pqn || !pqn->q) {
558                 pr_debug("No queue %d exists for update operation\n", qid);
559                 return -EFAULT;
560         }
561
562         /*
563          * Update with NULL ring address is used to disable the queue
564          */
565         if (p->queue_address && p->queue_size) {
566                 struct kfd_process_device *pdd;
567                 struct amdgpu_vm *vm;
568                 struct queue *q = pqn->q;
569                 int err;
570
571                 pdd = kfd_get_process_device_data(q->device, q->process);
572                 if (!pdd)
573                         return -ENODEV;
574                 vm = drm_priv_to_vm(pdd->drm_priv);
575                 err = amdgpu_bo_reserve(vm->root.bo, false);
576                 if (err)
577                         return err;
578
579                 if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
580                                          p->queue_size)) {
581                         pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
582                                  p->queue_address, p->queue_size);
583                         return -EFAULT;
584                 }
585
586                 kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
587                 kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
588                 amdgpu_bo_unreserve(vm->root.bo);
589
590                 pqn->q->properties.ring_bo = p->ring_bo;
591         }
592
593         pqn->q->properties.queue_address = p->queue_address;
594         pqn->q->properties.queue_size = p->queue_size;
595         pqn->q->properties.queue_percent = p->queue_percent;
596         pqn->q->properties.priority = p->priority;
597         pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
598
599         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
600                                                         pqn->q, NULL);
601         if (retval != 0)
602                 return retval;
603
604         return 0;
605 }
606
607 int pqm_update_mqd(struct process_queue_manager *pqm,
608                                 unsigned int qid, struct mqd_update_info *minfo)
609 {
610         int retval;
611         struct process_queue_node *pqn;
612
613         pqn = get_queue_by_qid(pqm, qid);
614         if (!pqn) {
615                 pr_debug("No queue %d exists for update operation\n", qid);
616                 return -EFAULT;
617         }
618
619         /* CUs are masked for debugger requirements so deny user mask  */
620         if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
621                 return -EBUSY;
622
623         /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
624         if (minfo && minfo->cu_mask.ptr &&
625                         KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
626                 int i;
627
628                 for (i = 0; i < minfo->cu_mask.count; i += 2) {
629                         uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
630
631                         if (cu_pair && cu_pair != 0x3) {
632                                 pr_debug("CUs must be adjacent pairwise enabled.\n");
633                                 return -EINVAL;
634                         }
635                 }
636         }
637
638         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
639                                                         pqn->q, minfo);
640         if (retval != 0)
641                 return retval;
642
643         if (minfo && minfo->cu_mask.ptr)
644                 pqn->q->properties.is_user_cu_masked = true;
645
646         return 0;
647 }
648
649 struct kernel_queue *pqm_get_kernel_queue(
650                                         struct process_queue_manager *pqm,
651                                         unsigned int qid)
652 {
653         struct process_queue_node *pqn;
654
655         pqn = get_queue_by_qid(pqm, qid);
656         if (pqn && pqn->kq)
657                 return pqn->kq;
658
659         return NULL;
660 }
661
662 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
663                                         unsigned int qid)
664 {
665         struct process_queue_node *pqn;
666
667         pqn = get_queue_by_qid(pqm, qid);
668         return pqn ? pqn->q : NULL;
669 }
670
671 int pqm_get_wave_state(struct process_queue_manager *pqm,
672                        unsigned int qid,
673                        void __user *ctl_stack,
674                        u32 *ctl_stack_used_size,
675                        u32 *save_area_used_size)
676 {
677         struct process_queue_node *pqn;
678
679         pqn = get_queue_by_qid(pqm, qid);
680         if (!pqn) {
681                 pr_debug("amdkfd: No queue %d exists for operation\n",
682                          qid);
683                 return -EFAULT;
684         }
685
686         return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
687                                                        pqn->q,
688                                                        ctl_stack,
689                                                        ctl_stack_used_size,
690                                                        save_area_used_size);
691 }
692
693 int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
694                            uint64_t exception_clear_mask,
695                            void __user *buf,
696                            int *num_qss_entries,
697                            uint32_t *entry_size)
698 {
699         struct process_queue_node *pqn;
700         struct kfd_queue_snapshot_entry src;
701         uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
702         int r = 0;
703
704         *num_qss_entries = 0;
705         if (!(*entry_size))
706                 return -EINVAL;
707
708         *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
709         mutex_lock(&pqm->process->event_mutex);
710
711         memset(&src, 0, sizeof(src));
712
713         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
714                 if (!pqn->q)
715                         continue;
716
717                 if (*num_qss_entries < tmp_qss_entries) {
718                         set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
719
720                         if (copy_to_user(buf, &src, *entry_size)) {
721                                 r = -EFAULT;
722                                 break;
723                         }
724                         buf += tmp_entry_size;
725                 }
726                 *num_qss_entries += 1;
727         }
728
729         mutex_unlock(&pqm->process->event_mutex);
730         return r;
731 }
732
733 static int get_queue_data_sizes(struct kfd_process_device *pdd,
734                                 struct queue *q,
735                                 uint32_t *mqd_size,
736                                 uint32_t *ctl_stack_size)
737 {
738         int ret;
739
740         ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
741                                             q->properties.queue_id,
742                                             mqd_size,
743                                             ctl_stack_size);
744         if (ret)
745                 pr_err("Failed to get queue dump info (%d)\n", ret);
746
747         return ret;
748 }
749
750 int kfd_process_get_queue_info(struct kfd_process *p,
751                                uint32_t *num_queues,
752                                uint64_t *priv_data_sizes)
753 {
754         uint32_t extra_data_sizes = 0;
755         struct queue *q;
756         int i;
757         int ret;
758
759         *num_queues = 0;
760
761         /* Run over all PDDs of the process */
762         for (i = 0; i < p->n_pdds; i++) {
763                 struct kfd_process_device *pdd = p->pdds[i];
764
765                 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
766                         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
767                                 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
768                                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
769                                 uint32_t mqd_size, ctl_stack_size;
770
771                                 *num_queues = *num_queues + 1;
772
773                                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
774                                 if (ret)
775                                         return ret;
776
777                                 extra_data_sizes += mqd_size + ctl_stack_size;
778                         } else {
779                                 pr_err("Unsupported queue type (%d)\n", q->properties.type);
780                                 return -EOPNOTSUPP;
781                         }
782                 }
783         }
784         *priv_data_sizes = extra_data_sizes +
785                                 (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
786
787         return 0;
788 }
789
790 static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
791                               unsigned int qid,
792                               void *mqd,
793                               void *ctl_stack)
794 {
795         struct process_queue_node *pqn;
796
797         pqn = get_queue_by_qid(pqm, qid);
798         if (!pqn) {
799                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
800                 return -EFAULT;
801         }
802
803         if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
804                 pr_err("amdkfd: queue dumping not supported on this device\n");
805                 return -EOPNOTSUPP;
806         }
807
808         return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
809                                                        pqn->q, mqd, ctl_stack);
810 }
811
812 static int criu_checkpoint_queue(struct kfd_process_device *pdd,
813                            struct queue *q,
814                            struct kfd_criu_queue_priv_data *q_data)
815 {
816         uint8_t *mqd, *ctl_stack;
817         int ret;
818
819         mqd = (void *)(q_data + 1);
820         ctl_stack = mqd + q_data->mqd_size;
821
822         q_data->gpu_id = pdd->user_gpu_id;
823         q_data->type = q->properties.type;
824         q_data->format = q->properties.format;
825         q_data->q_id =  q->properties.queue_id;
826         q_data->q_address = q->properties.queue_address;
827         q_data->q_size = q->properties.queue_size;
828         q_data->priority = q->properties.priority;
829         q_data->q_percent = q->properties.queue_percent;
830         q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
831         q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
832         q_data->doorbell_id = q->doorbell_id;
833
834         q_data->sdma_id = q->sdma_id;
835
836         q_data->eop_ring_buffer_address =
837                 q->properties.eop_ring_buffer_address;
838
839         q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
840
841         q_data->ctx_save_restore_area_address =
842                 q->properties.ctx_save_restore_area_address;
843
844         q_data->ctx_save_restore_area_size =
845                 q->properties.ctx_save_restore_area_size;
846
847         q_data->gws = !!q->gws;
848
849         ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
850         if (ret) {
851                 pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
852                 return ret;
853         }
854
855         pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
856         return ret;
857 }
858
859 static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
860                                    uint8_t __user *user_priv,
861                                    unsigned int *q_index,
862                                    uint64_t *queues_priv_data_offset)
863 {
864         unsigned int q_private_data_size = 0;
865         uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
866         struct queue *q;
867         int ret = 0;
868
869         list_for_each_entry(q, &pdd->qpd.queues_list, list) {
870                 struct kfd_criu_queue_priv_data *q_data;
871                 uint64_t q_data_size;
872                 uint32_t mqd_size;
873                 uint32_t ctl_stack_size;
874
875                 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
876                         q->properties.type != KFD_QUEUE_TYPE_SDMA &&
877                         q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
878
879                         pr_err("Unsupported queue type (%d)\n", q->properties.type);
880                         ret = -EOPNOTSUPP;
881                         break;
882                 }
883
884                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
885                 if (ret)
886                         break;
887
888                 q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
889
890                 /* Increase local buffer space if needed */
891                 if (q_private_data_size < q_data_size) {
892                         kfree(q_private_data);
893
894                         q_private_data = kzalloc(q_data_size, GFP_KERNEL);
895                         if (!q_private_data) {
896                                 ret = -ENOMEM;
897                                 break;
898                         }
899                         q_private_data_size = q_data_size;
900                 }
901
902                 q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
903
904                 /* data stored in this order: priv_data, mqd, ctl_stack */
905                 q_data->mqd_size = mqd_size;
906                 q_data->ctl_stack_size = ctl_stack_size;
907
908                 ret = criu_checkpoint_queue(pdd, q, q_data);
909                 if (ret)
910                         break;
911
912                 q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
913
914                 ret = copy_to_user(user_priv + *queues_priv_data_offset,
915                                 q_data, q_data_size);
916                 if (ret) {
917                         ret = -EFAULT;
918                         break;
919                 }
920                 *queues_priv_data_offset += q_data_size;
921                 *q_index = *q_index + 1;
922         }
923
924         kfree(q_private_data);
925
926         return ret;
927 }
928
929 int kfd_criu_checkpoint_queues(struct kfd_process *p,
930                          uint8_t __user *user_priv_data,
931                          uint64_t *priv_data_offset)
932 {
933         int ret = 0, pdd_index, q_index = 0;
934
935         for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
936                 struct kfd_process_device *pdd = p->pdds[pdd_index];
937
938                 /*
939                  * criu_checkpoint_queues_device will copy data to user and update q_index and
940                  * queues_priv_data_offset
941                  */
942                 ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
943                                               priv_data_offset);
944
945                 if (ret)
946                         break;
947         }
948
949         return ret;
950 }
951
952 static void set_queue_properties_from_criu(struct queue_properties *qp,
953                                           struct kfd_criu_queue_priv_data *q_data)
954 {
955         qp->is_interop = false;
956         qp->queue_percent = q_data->q_percent;
957         qp->priority = q_data->priority;
958         qp->queue_address = q_data->q_address;
959         qp->queue_size = q_data->q_size;
960         qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
961         qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
962         qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
963         qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
964         qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
965         qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
966         qp->ctl_stack_size = q_data->ctl_stack_size;
967         qp->type = q_data->type;
968         qp->format = q_data->format;
969 }
970
971 int kfd_criu_restore_queue(struct kfd_process *p,
972                            uint8_t __user *user_priv_ptr,
973                            uint64_t *priv_data_offset,
974                            uint64_t max_priv_data_size)
975 {
976         uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
977         struct kfd_criu_queue_priv_data *q_data;
978         struct kfd_process_device *pdd;
979         uint64_t q_extra_data_size;
980         struct queue_properties qp;
981         unsigned int queue_id;
982         int ret = 0;
983
984         if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
985                 return -EINVAL;
986
987         q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
988         if (!q_data)
989                 return -ENOMEM;
990
991         ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
992         if (ret) {
993                 ret = -EFAULT;
994                 goto exit;
995         }
996
997         *priv_data_offset += sizeof(*q_data);
998         q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
999
1000         if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
1001                 ret = -EINVAL;
1002                 goto exit;
1003         }
1004
1005         q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
1006         if (!q_extra_data) {
1007                 ret = -ENOMEM;
1008                 goto exit;
1009         }
1010
1011         ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
1012         if (ret) {
1013                 ret = -EFAULT;
1014                 goto exit;
1015         }
1016
1017         *priv_data_offset += q_extra_data_size;
1018
1019         pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
1020         if (!pdd) {
1021                 pr_err("Failed to get pdd\n");
1022                 ret = -EINVAL;
1023                 goto exit;
1024         }
1025
1026         /* data stored in this order: mqd, ctl_stack */
1027         mqd = q_extra_data;
1028         ctl_stack = mqd + q_data->mqd_size;
1029
1030         memset(&qp, 0, sizeof(qp));
1031         set_queue_properties_from_criu(&qp, q_data);
1032
1033         print_queue_properties(&qp);
1034
1035         ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
1036         if (ret) {
1037                 pr_err("Failed to create new queue err:%d\n", ret);
1038                 goto exit;
1039         }
1040
1041         if (q_data->gws)
1042                 ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
1043
1044 exit:
1045         if (ret)
1046                 pr_err("Failed to restore queue (%d)\n", ret);
1047         else
1048                 pr_debug("Queue id %d was restored successfully\n", queue_id);
1049
1050         kfree(q_data);
1051         kfree(q_extra_data);
1052
1053         return ret;
1054 }
1055
1056 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
1057                                   unsigned int qid,
1058                                   uint32_t *mqd_size,
1059                                   uint32_t *ctl_stack_size)
1060 {
1061         struct process_queue_node *pqn;
1062
1063         pqn = get_queue_by_qid(pqm, qid);
1064         if (!pqn) {
1065                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
1066                 return -EFAULT;
1067         }
1068
1069         if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
1070                 pr_err("amdkfd: queue dumping not supported on this device\n");
1071                 return -EOPNOTSUPP;
1072         }
1073
1074         pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
1075                                                        pqn->q, mqd_size,
1076                                                        ctl_stack_size);
1077         return 0;
1078 }
1079
1080 #if defined(CONFIG_DEBUG_FS)
1081
1082 int pqm_debugfs_mqds(struct seq_file *m, void *data)
1083 {
1084         struct process_queue_manager *pqm = data;
1085         struct process_queue_node *pqn;
1086         struct queue *q;
1087         enum KFD_MQD_TYPE mqd_type;
1088         struct mqd_manager *mqd_mgr;
1089         int r = 0, xcc, num_xccs = 1;
1090         void *mqd;
1091         uint64_t size = 0;
1092
1093         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1094                 if (pqn->q) {
1095                         q = pqn->q;
1096                         switch (q->properties.type) {
1097                         case KFD_QUEUE_TYPE_SDMA:
1098                         case KFD_QUEUE_TYPE_SDMA_XGMI:
1099                                 seq_printf(m, "  SDMA queue on device %x\n",
1100                                            q->device->id);
1101                                 mqd_type = KFD_MQD_TYPE_SDMA;
1102                                 break;
1103                         case KFD_QUEUE_TYPE_COMPUTE:
1104                                 seq_printf(m, "  Compute queue on device %x\n",
1105                                            q->device->id);
1106                                 mqd_type = KFD_MQD_TYPE_CP;
1107                                 num_xccs = NUM_XCC(q->device->xcc_mask);
1108                                 break;
1109                         default:
1110                                 seq_printf(m,
1111                                 "  Bad user queue type %d on device %x\n",
1112                                            q->properties.type, q->device->id);
1113                                 continue;
1114                         }
1115                         mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1116                         size = mqd_mgr->mqd_stride(mqd_mgr,
1117                                                         &q->properties);
1118                 } else if (pqn->kq) {
1119                         q = pqn->kq->queue;
1120                         mqd_mgr = pqn->kq->mqd_mgr;
1121                         switch (q->properties.type) {
1122                         case KFD_QUEUE_TYPE_DIQ:
1123                                 seq_printf(m, "  DIQ on device %x\n",
1124                                            pqn->kq->dev->id);
1125                                 break;
1126                         default:
1127                                 seq_printf(m,
1128                                 "  Bad kernel queue type %d on device %x\n",
1129                                            q->properties.type,
1130                                            pqn->kq->dev->id);
1131                                 continue;
1132                         }
1133                 } else {
1134                         seq_printf(m,
1135                 "  Weird: Queue node with neither kernel nor user queue\n");
1136                         continue;
1137                 }
1138
1139                 for (xcc = 0; xcc < num_xccs; xcc++) {
1140                         mqd = q->mqd + size * xcc;
1141                         r = mqd_mgr->debugfs_show_mqd(m, mqd);
1142                         if (r != 0)
1143                                 break;
1144                 }
1145         }
1146
1147         return r;
1148 }
1149
1150 #endif
This page took 0.101689 seconds and 4 git commands to generate.