drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

   1 // SPDX-License-Identifier: GPL-2.0 OR MIT
   2 /*
   3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <linux/slab.h>
  26 #include <linux/list.h>
  27 #include "kfd_device_queue_manager.h"
  28 #include "kfd_priv.h"
  29 #include "kfd_kernel_queue.h"
  30 #include "amdgpu_amdkfd.h"
  31 #include "amdgpu_reset.h"
  32
  33 static inline struct process_queue_node *get_queue_by_qid(
  34                         struct process_queue_manager *pqm, unsigned int qid)
  35 {
  36         struct process_queue_node *pqn;
  37
  38         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
  39                 if ((pqn->q && pqn->q->properties.queue_id == qid) ||
  40                     (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
  41                         return pqn;
  42         }
  43
  44         return NULL;
  45 }
  46
  47 static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
  48                                     unsigned int qid)
  49 {
  50         if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
  51                 return -EINVAL;
  52
  53         if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
  54                 pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
  55                 return -ENOSPC;
  56         }
  57
  58         return 0;
  59 }
  60
  61 static int find_available_queue_slot(struct process_queue_manager *pqm,
  62                                         unsigned int *qid)
  63 {
  64         unsigned long found;
  65
  66         found = find_first_zero_bit(pqm->queue_slot_bitmap,
  67                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
  68
  69         pr_debug("The new slot id %lu\n", found);
  70
  71         if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
  72                 pr_info("Cannot open more queues for process with pasid 0x%x\n",
  73                                 pqm->process->pasid);
  74                 return -ENOMEM;
  75         }
  76
  77         set_bit(found, pqm->queue_slot_bitmap);
  78         *qid = found;
  79
  80         return 0;
  81 }
  82
  83 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
  84 {
  85         struct kfd_node *dev = pdd->dev;
  86
  87         if (pdd->already_dequeued)
  88                 return;
  89
  90         dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
  91         if (dev->kfd->shared_resources.enable_mes &&
  92             down_read_trylock(&dev->adev->reset_domain->sem)) {
  93                 amdgpu_mes_flush_shader_debugger(dev->adev,
  94                                                  pdd->proc_ctx_gpu_addr);
  95                 up_read(&dev->adev->reset_domain->sem);
  96         }
  97         pdd->already_dequeued = true;
  98 }
  99
 100 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
 101                         void *gws)
 102 {
 103         struct mqd_update_info minfo = {0};
 104         struct kfd_node *dev = NULL;
 105         struct process_queue_node *pqn;
 106         struct kfd_process_device *pdd;
 107         struct kgd_mem *mem = NULL;
 108         int ret;
 109
 110         pqn = get_queue_by_qid(pqm, qid);
 111         if (!pqn) {
 112                 pr_err("Queue id does not match any known queue\n");
 113                 return -EINVAL;
 114         }
 115
 116         if (pqn->q)
 117                 dev = pqn->q->device;
 118         if (WARN_ON(!dev))
 119                 return -ENODEV;
 120
 121         pdd = kfd_get_process_device_data(dev, pqm->process);
 122         if (!pdd) {
 123                 pr_err("Process device data doesn't exist\n");
 124                 return -EINVAL;
 125         }
 126
 127         /* Only allow one queue per process can have GWS assigned */
 128         if (gws && pdd->qpd.num_gws)
 129                 return -EBUSY;
 130
 131         if (!gws && pdd->qpd.num_gws == 0)
 132                 return -EINVAL;
 133
 134         if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
 135             KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
 136             !dev->kfd->shared_resources.enable_mes) {
 137                 if (gws)
 138                         ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
 139                                 gws, &mem);
 140                 else
 141                         ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
 142                                 pqn->q->gws);
 143                 if (unlikely(ret))
 144                         return ret;
 145                 pqn->q->gws = mem;
 146         } else {
 147                 /*
 148                  * Intentionally set GWS to a non-NULL value
 149                  * for devices that do not use GWS for global wave
 150                  * synchronization but require the formality
 151                  * of setting GWS for cooperative groups.
 152                  */
 153                 pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
 154         }
 155
 156         pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
 157         minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
 158
 159         return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
 160                                                         pqn->q, &minfo);
 161 }
 162
 163 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
 164 {
 165         int i;
 166
 167         for (i = 0; i < p->n_pdds; i++)
 168                 kfd_process_dequeue_from_device(p->pdds[i]);
 169 }
 170
 171 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 172 {
 173         INIT_LIST_HEAD(&pqm->queues);
 174         pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 175                                                GFP_KERNEL);
 176         if (!pqm->queue_slot_bitmap)
 177                 return -ENOMEM;
 178         pqm->process = p;
 179
 180         return 0;
 181 }
 182
 183 static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
 184                                      struct process_queue_node *pqn)
 185 {
 186         struct kfd_node *dev;
 187         struct kfd_process_device *pdd;
 188
 189         dev = pqn->q->device;
 190
 191         pdd = kfd_get_process_device_data(dev, pqm->process);
 192         if (!pdd) {
 193                 pr_err("Process device data doesn't exist\n");
 194                 return;
 195         }
 196
 197         if (pqn->q->gws) {
 198                 if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
 199                     KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
 200                     !dev->kfd->shared_resources.enable_mes)
 201                         amdgpu_amdkfd_remove_gws_from_process(
 202                                 pqm->process->kgd_process_info, pqn->q->gws);
 203                 pdd->qpd.num_gws = 0;
 204         }
 205
 206         if (dev->kfd->shared_resources.enable_mes) {
 207                 amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
 208                 amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
 209         }
 210 }
 211
 212 void pqm_uninit(struct process_queue_manager *pqm)
 213 {
 214         struct process_queue_node *pqn, *next;
 215
 216         list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
 217                 if (pqn->q) {
 218                         struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
 219                                                                                      pqm->process);
 220                         if (pdd) {
 221                                 kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
 222                                 kfd_queue_release_buffers(pdd, &pqn->q->properties);
 223                         } else {
 224                                 WARN_ON(!pdd);
 225                         }
 226                         pqm_clean_queue_resource(pqm, pqn);
 227                 }
 228
 229                 kfd_procfs_del_queue(pqn->q);
 230                 uninit_queue(pqn->q);
 231                 list_del(&pqn->process_queue_list);
 232                 kfree(pqn);
 233         }
 234
 235         bitmap_free(pqm->queue_slot_bitmap);
 236         pqm->queue_slot_bitmap = NULL;
 237 }
 238
 239 static int init_user_queue(struct process_queue_manager *pqm,
 240                                 struct kfd_node *dev, struct queue **q,
 241                                 struct queue_properties *q_properties,
 242                                 unsigned int qid)
 243 {
 244         int retval;
 245
 246         /* Doorbell initialized in user space*/
 247         q_properties->doorbell_ptr = NULL;
 248         q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
 249
 250         /* let DQM handle it*/
 251         q_properties->vmid = 0;
 252         q_properties->queue_id = qid;
 253
 254         retval = init_queue(q, q_properties);
 255         if (retval != 0)
 256                 return retval;
 257
 258         (*q)->device = dev;
 259         (*q)->process = pqm->process;
 260
 261         if (dev->kfd->shared_resources.enable_mes) {
 262                 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
 263                                                 AMDGPU_MES_GANG_CTX_SIZE,
 264                                                 &(*q)->gang_ctx_bo,
 265                                                 &(*q)->gang_ctx_gpu_addr,
 266                                                 &(*q)->gang_ctx_cpu_ptr,
 267                                                 false);
 268                 if (retval) {
 269                         pr_err("failed to allocate gang context bo\n");
 270                         goto cleanup;
 271                 }
 272                 memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
 273
 274                 /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
 275                  * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
 276                  */
 277                 if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
 278                     >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
 279                         if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
 280                                 pr_err("Queue memory allocated to wrong device\n");
 281                                 retval = -EINVAL;
 282                                 goto free_gang_ctx_bo;
 283                         }
 284
 285                         retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
 286                                                                   &(*q)->wptr_bo_gart);
 287                         if (retval) {
 288                                 pr_err("Failed to map wptr bo to GART\n");
 289                                 goto free_gang_ctx_bo;
 290                         }
 291                 }
 292         }
 293
 294         pr_debug("PQM After init queue");
 295         return 0;
 296
 297 free_gang_ctx_bo:
 298         amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo);
 299 cleanup:
 300         uninit_queue(*q);
 301         *q = NULL;
 302         return retval;
 303 }
 304
 305 int pqm_create_queue(struct process_queue_manager *pqm,
 306                             struct kfd_node *dev,
 307                             struct queue_properties *properties,
 308                             unsigned int *qid,
 309                             const struct kfd_criu_queue_priv_data *q_data,
 310                             const void *restore_mqd,
 311                             const void *restore_ctl_stack,
 312                             uint32_t *p_doorbell_offset_in_process)
 313 {
 314         int retval;
 315         struct kfd_process_device *pdd;
 316         struct queue *q;
 317         struct process_queue_node *pqn;
 318         struct kernel_queue *kq;
 319         enum kfd_queue_type type = properties->type;
 320         unsigned int max_queues = 127; /* HWS limit */
 321
 322         /*
 323          * On GFX 9.4.3, increase the number of queues that
 324          * can be created to 255. No HWS limit on GFX 9.4.3.
 325          */
 326         if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
 327             KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
 328                 max_queues = 255;
 329
 330         q = NULL;
 331         kq = NULL;
 332
 333         pdd = kfd_get_process_device_data(dev, pqm->process);
 334         if (!pdd) {
 335                 pr_err("Process device data doesn't exist\n");
 336                 return -1;
 337         }
 338
 339         /*
 340          * for debug process, verify that it is within the static queues limit
 341          * currently limit is set to half of the total avail HQD slots
 342          * If we are just about to create DIQ, the is_debug flag is not set yet
 343          * Hence we also check the type as well
 344          */
 345         if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
 346                 max_queues = dev->kfd->device_info.max_no_of_hqd/2;
 347
 348         if (pdd->qpd.queue_count >= max_queues)
 349                 return -ENOSPC;
 350
 351         if (q_data) {
 352                 retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
 353                 *qid = q_data->q_id;
 354         } else
 355                 retval = find_available_queue_slot(pqm, qid);
 356
 357         if (retval != 0)
 358                 return retval;
 359
 360         if (list_empty(&pdd->qpd.queues_list) &&
 361             list_empty(&pdd->qpd.priv_queue_list))
 362                 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
 363
 364         pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
 365         if (!pqn) {
 366                 retval = -ENOMEM;
 367                 goto err_allocate_pqn;
 368         }
 369
 370         switch (type) {
 371         case KFD_QUEUE_TYPE_SDMA:
 372         case KFD_QUEUE_TYPE_SDMA_XGMI:
 373         case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
 374                 /* SDMA queues are always allocated statically no matter
 375                  * which scheduler mode is used. We also do not need to
 376                  * check whether a SDMA queue can be allocated here, because
 377                  * allocate_sdma_queue() in create_queue() has the
 378                  * corresponding check logic.
 379                  */
 380                 retval = init_user_queue(pqm, dev, &q, properties, *qid);
 381                 if (retval != 0)
 382                         goto err_create_queue;
 383                 pqn->q = q;
 384                 pqn->kq = NULL;
 385                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
 386                                                     restore_mqd, restore_ctl_stack);
 387                 print_queue(q);
 388                 break;
 389
 390         case KFD_QUEUE_TYPE_COMPUTE:
 391                 /* check if there is over subscription */
 392                 if ((dev->dqm->sched_policy ==
 393                      KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
 394                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
 395                 (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
 396                         pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
 397                         retval = -EPERM;
 398                         goto err_create_queue;
 399                 }
 400
 401                 retval = init_user_queue(pqm, dev, &q, properties, *qid);
 402                 if (retval != 0)
 403                         goto err_create_queue;
 404                 pqn->q = q;
 405                 pqn->kq = NULL;
 406                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
 407                                                     restore_mqd, restore_ctl_stack);
 408                 print_queue(q);
 409                 break;
 410         case KFD_QUEUE_TYPE_DIQ:
 411                 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
 412                 if (!kq) {
 413                         retval = -ENOMEM;
 414                         goto err_create_queue;
 415                 }
 416                 kq->queue->properties.queue_id = *qid;
 417                 pqn->kq = kq;
 418                 pqn->q = NULL;
 419                 retval = kfd_process_drain_interrupts(pdd);
 420                 if (retval)
 421                         break;
 422
 423                 retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
 424                                                         kq, &pdd->qpd);
 425                 break;
 426         default:
 427                 WARN(1, "Invalid queue type %d", type);
 428                 retval = -EINVAL;
 429         }
 430
 431         if (retval != 0) {
 432                 pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
 433                         pqm->process->pasid, type, retval);
 434                 goto err_create_queue;
 435         }
 436
 437         if (q && p_doorbell_offset_in_process) {
 438                 /* Return the doorbell offset within the doorbell page
 439                  * to the caller so it can be passed up to user mode
 440                  * (in bytes).
 441                  * relative doorbell index = Absolute doorbell index -
 442                  * absolute index of first doorbell in the page.
 443                  */
 444                 uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
 445                                                                        pdd->qpd.proc_doorbells,
 446                                                                        0,
 447                                                                        pdd->dev->kfd->device_info.doorbell_size);
 448
 449                 *p_doorbell_offset_in_process = (q->properties.doorbell_off
 450                                                 - first_db_index) * sizeof(uint32_t);
 451         }
 452
 453         pr_debug("PQM After DQM create queue\n");
 454
 455         list_add(&pqn->process_queue_list, &pqm->queues);
 456
 457         if (q) {
 458                 pr_debug("PQM done creating queue\n");
 459                 kfd_procfs_add_queue(q);
 460                 print_queue_properties(&q->properties);
 461         }
 462
 463         return retval;
 464
 465 err_create_queue:
 466         uninit_queue(q);
 467         if (kq)
 468                 kernel_queue_uninit(kq);
 469         kfree(pqn);
 470 err_allocate_pqn:
 471         /* check if queues list is empty unregister process from device */
 472         clear_bit(*qid, pqm->queue_slot_bitmap);
 473         if (list_empty(&pdd->qpd.queues_list) &&
 474             list_empty(&pdd->qpd.priv_queue_list))
 475                 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
 476         return retval;
 477 }
 478
 479 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 480 {
 481         struct process_queue_node *pqn;
 482         struct kfd_process_device *pdd;
 483         struct device_queue_manager *dqm;
 484         struct kfd_node *dev;
 485         int retval;
 486
 487         dqm = NULL;
 488
 489         retval = 0;
 490
 491         pqn = get_queue_by_qid(pqm, qid);
 492         if (!pqn) {
 493                 pr_err("Queue id does not match any known queue\n");
 494                 return -EINVAL;
 495         }
 496
 497         dev = NULL;
 498         if (pqn->kq)
 499                 dev = pqn->kq->dev;
 500         if (pqn->q)
 501                 dev = pqn->q->device;
 502         if (WARN_ON(!dev))
 503                 return -ENODEV;
 504
 505         pdd = kfd_get_process_device_data(dev, pqm->process);
 506         if (!pdd) {
 507                 pr_err("Process device data doesn't exist\n");
 508                 return -1;
 509         }
 510
 511         if (pqn->kq) {
 512                 /* destroy kernel queue (DIQ) */
 513                 dqm = pqn->kq->dev->dqm;
 514                 dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
 515                 kernel_queue_uninit(pqn->kq);
 516         }
 517
 518         if (pqn->q) {
 519                 retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
 520                 if (retval)
 521                         goto err_destroy_queue;
 522
 523                 dqm = pqn->q->device->dqm;
 524                 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
 525                 if (retval) {
 526                         pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
 527                                 pqm->process->pasid,
 528                                 pqn->q->properties.queue_id, retval);
 529                         if (retval != -ETIME)
 530                                 goto err_destroy_queue;
 531                 }
 532                 kfd_procfs_del_queue(pqn->q);
 533                 kfd_queue_release_buffers(pdd, &pqn->q->properties);
 534                 pqm_clean_queue_resource(pqm, pqn);
 535                 uninit_queue(pqn->q);
 536         }
 537
 538         list_del(&pqn->process_queue_list);
 539         kfree(pqn);
 540         clear_bit(qid, pqm->queue_slot_bitmap);
 541
 542         if (list_empty(&pdd->qpd.queues_list) &&
 543             list_empty(&pdd->qpd.priv_queue_list))
 544                 dqm->ops.unregister_process(dqm, &pdd->qpd);
 545
 546 err_destroy_queue:
 547         return retval;
 548 }
 549
 550 int pqm_update_queue_properties(struct process_queue_manager *pqm,
 551                                 unsigned int qid, struct queue_properties *p)
 552 {
 553         int retval;
 554         struct process_queue_node *pqn;
 555
 556         pqn = get_queue_by_qid(pqm, qid);
 557         if (!pqn || !pqn->q) {
 558                 pr_debug("No queue %d exists for update operation\n", qid);
 559                 return -EFAULT;
 560         }
 561
 562         /*
 563          * Update with NULL ring address is used to disable the queue
 564          */
 565         if (p->queue_address && p->queue_size) {
 566                 struct kfd_process_device *pdd;
 567                 struct amdgpu_vm *vm;
 568                 struct queue *q = pqn->q;
 569                 int err;
 570
 571                 pdd = kfd_get_process_device_data(q->device, q->process);
 572                 if (!pdd)
 573                         return -ENODEV;
 574                 vm = drm_priv_to_vm(pdd->drm_priv);
 575                 err = amdgpu_bo_reserve(vm->root.bo, false);
 576                 if (err)
 577                         return err;
 578
 579                 if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
 580                                          p->queue_size)) {
 581                         pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
 582                                  p->queue_address, p->queue_size);
 583                         return -EFAULT;
 584                 }
 585
 586                 kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
 587                 kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
 588                 amdgpu_bo_unreserve(vm->root.bo);
 589
 590                 pqn->q->properties.ring_bo = p->ring_bo;
 591         }
 592
 593         pqn->q->properties.queue_address = p->queue_address;
 594         pqn->q->properties.queue_size = p->queue_size;
 595         pqn->q->properties.queue_percent = p->queue_percent;
 596         pqn->q->properties.priority = p->priority;
 597         pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
 598
 599         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
 600                                                         pqn->q, NULL);
 601         if (retval != 0)
 602                 return retval;
 603
 604         return 0;
 605 }
 606
 607 int pqm_update_mqd(struct process_queue_manager *pqm,
 608                                 unsigned int qid, struct mqd_update_info *minfo)
 609 {
 610         int retval;
 611         struct process_queue_node *pqn;
 612
 613         pqn = get_queue_by_qid(pqm, qid);
 614         if (!pqn) {
 615                 pr_debug("No queue %d exists for update operation\n", qid);
 616                 return -EFAULT;
 617         }
 618
 619         /* CUs are masked for debugger requirements so deny user mask  */
 620         if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
 621                 return -EBUSY;
 622
 623         /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
 624         if (minfo && minfo->cu_mask.ptr &&
 625                         KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
 626                 int i;
 627
 628                 for (i = 0; i < minfo->cu_mask.count; i += 2) {
 629                         uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
 630
 631                         if (cu_pair && cu_pair != 0x3) {
 632                                 pr_debug("CUs must be adjacent pairwise enabled.\n");
 633                                 return -EINVAL;
 634                         }
 635                 }
 636         }
 637
 638         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
 639                                                         pqn->q, minfo);
 640         if (retval != 0)
 641                 return retval;
 642
 643         if (minfo && minfo->cu_mask.ptr)
 644                 pqn->q->properties.is_user_cu_masked = true;
 645
 646         return 0;
 647 }
 648
 649 struct kernel_queue *pqm_get_kernel_queue(
 650                                         struct process_queue_manager *pqm,
 651                                         unsigned int qid)
 652 {
 653         struct process_queue_node *pqn;
 654
 655         pqn = get_queue_by_qid(pqm, qid);
 656         if (pqn && pqn->kq)
 657                 return pqn->kq;
 658
 659         return NULL;
 660 }
 661
 662 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
 663                                         unsigned int qid)
 664 {
 665         struct process_queue_node *pqn;
 666
 667         pqn = get_queue_by_qid(pqm, qid);
 668         return pqn ? pqn->q : NULL;
 669 }
 670
 671 int pqm_get_wave_state(struct process_queue_manager *pqm,
 672                        unsigned int qid,
 673                        void __user *ctl_stack,
 674                        u32 *ctl_stack_used_size,
 675                        u32 *save_area_used_size)
 676 {
 677         struct process_queue_node *pqn;
 678
 679         pqn = get_queue_by_qid(pqm, qid);
 680         if (!pqn) {
 681                 pr_debug("amdkfd: No queue %d exists for operation\n",
 682                          qid);
 683                 return -EFAULT;
 684         }
 685
 686         return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
 687                                                        pqn->q,
 688                                                        ctl_stack,
 689                                                        ctl_stack_used_size,
 690                                                        save_area_used_size);
 691 }
 692
 693 int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
 694                            uint64_t exception_clear_mask,
 695                            void __user *buf,
 696                            int *num_qss_entries,
 697                            uint32_t *entry_size)
 698 {
 699         struct process_queue_node *pqn;
 700         struct kfd_queue_snapshot_entry src;
 701         uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
 702         int r = 0;
 703
 704         *num_qss_entries = 0;
 705         if (!(*entry_size))
 706                 return -EINVAL;
 707
 708         *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
 709         mutex_lock(&pqm->process->event_mutex);
 710
 711         memset(&src, 0, sizeof(src));
 712
 713         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
 714                 if (!pqn->q)
 715                         continue;
 716
 717                 if (*num_qss_entries < tmp_qss_entries) {
 718                         set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
 719
 720                         if (copy_to_user(buf, &src, *entry_size)) {
 721                                 r = -EFAULT;
 722                                 break;
 723                         }
 724                         buf += tmp_entry_size;
 725                 }
 726                 *num_qss_entries += 1;
 727         }
 728
 729         mutex_unlock(&pqm->process->event_mutex);
 730         return r;
 731 }
 732
 733 static int get_queue_data_sizes(struct kfd_process_device *pdd,
 734                                 struct queue *q,
 735                                 uint32_t *mqd_size,
 736                                 uint32_t *ctl_stack_size)
 737 {
 738         int ret;
 739
 740         ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
 741                                             q->properties.queue_id,
 742                                             mqd_size,
 743                                             ctl_stack_size);
 744         if (ret)
 745                 pr_err("Failed to get queue dump info (%d)\n", ret);
 746
 747         return ret;
 748 }
 749
 750 int kfd_process_get_queue_info(struct kfd_process *p,
 751                                uint32_t *num_queues,
 752                                uint64_t *priv_data_sizes)
 753 {
 754         uint32_t extra_data_sizes = 0;
 755         struct queue *q;
 756         int i;
 757         int ret;
 758
 759         *num_queues = 0;
 760
 761         /* Run over all PDDs of the process */
 762         for (i = 0; i < p->n_pdds; i++) {
 763                 struct kfd_process_device *pdd = p->pdds[i];
 764
 765                 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
 766                         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 767                                 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 768                                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 769                                 uint32_t mqd_size, ctl_stack_size;
 770
 771                                 *num_queues = *num_queues + 1;
 772
 773                                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
 774                                 if (ret)
 775                                         return ret;
 776
 777                                 extra_data_sizes += mqd_size + ctl_stack_size;
 778                         } else {
 779                                 pr_err("Unsupported queue type (%d)\n", q->properties.type);
 780                                 return -EOPNOTSUPP;
 781                         }
 782                 }
 783         }
 784         *priv_data_sizes = extra_data_sizes +
 785                                 (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
 786
 787         return 0;
 788 }
 789
 790 static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
 791                               unsigned int qid,
 792                               void *mqd,
 793                               void *ctl_stack)
 794 {
 795         struct process_queue_node *pqn;
 796
 797         pqn = get_queue_by_qid(pqm, qid);
 798         if (!pqn) {
 799                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
 800                 return -EFAULT;
 801         }
 802
 803         if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
 804                 pr_err("amdkfd: queue dumping not supported on this device\n");
 805                 return -EOPNOTSUPP;
 806         }
 807
 808         return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
 809                                                        pqn->q, mqd, ctl_stack);
 810 }
 811
 812 static int criu_checkpoint_queue(struct kfd_process_device *pdd,
 813                            struct queue *q,
 814                            struct kfd_criu_queue_priv_data *q_data)
 815 {
 816         uint8_t *mqd, *ctl_stack;
 817         int ret;
 818
 819         mqd = (void *)(q_data + 1);
 820         ctl_stack = mqd + q_data->mqd_size;
 821
 822         q_data->gpu_id = pdd->user_gpu_id;
 823         q_data->type = q->properties.type;
 824         q_data->format = q->properties.format;
 825         q_data->q_id =  q->properties.queue_id;
 826         q_data->q_address = q->properties.queue_address;
 827         q_data->q_size = q->properties.queue_size;
 828         q_data->priority = q->properties.priority;
 829         q_data->q_percent = q->properties.queue_percent;
 830         q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
 831         q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
 832         q_data->doorbell_id = q->doorbell_id;
 833
 834         q_data->sdma_id = q->sdma_id;
 835
 836         q_data->eop_ring_buffer_address =
 837                 q->properties.eop_ring_buffer_address;
 838
 839         q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
 840
 841         q_data->ctx_save_restore_area_address =
 842                 q->properties.ctx_save_restore_area_address;
 843
 844         q_data->ctx_save_restore_area_size =
 845                 q->properties.ctx_save_restore_area_size;
 846
 847         q_data->gws = !!q->gws;
 848
 849         ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
 850         if (ret) {
 851                 pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
 852                 return ret;
 853         }
 854
 855         pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
 856         return ret;
 857 }
 858
 859 static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
 860                                    uint8_t __user *user_priv,
 861                                    unsigned int *q_index,
 862                                    uint64_t *queues_priv_data_offset)
 863 {
 864         unsigned int q_private_data_size = 0;
 865         uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
 866         struct queue *q;
 867         int ret = 0;
 868
 869         list_for_each_entry(q, &pdd->qpd.queues_list, list) {
 870                 struct kfd_criu_queue_priv_data *q_data;
 871                 uint64_t q_data_size;
 872                 uint32_t mqd_size;
 873                 uint32_t ctl_stack_size;
 874
 875                 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
 876                         q->properties.type != KFD_QUEUE_TYPE_SDMA &&
 877                         q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
 878
 879                         pr_err("Unsupported queue type (%d)\n", q->properties.type);
 880                         ret = -EOPNOTSUPP;
 881                         break;
 882                 }
 883
 884                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
 885                 if (ret)
 886                         break;
 887
 888                 q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
 889
 890                 /* Increase local buffer space if needed */
 891                 if (q_private_data_size < q_data_size) {
 892                         kfree(q_private_data);
 893
 894                         q_private_data = kzalloc(q_data_size, GFP_KERNEL);
 895                         if (!q_private_data) {
 896                                 ret = -ENOMEM;
 897                                 break;
 898                         }
 899                         q_private_data_size = q_data_size;
 900                 }
 901
 902                 q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
 903
 904                 /* data stored in this order: priv_data, mqd, ctl_stack */
 905                 q_data->mqd_size = mqd_size;
 906                 q_data->ctl_stack_size = ctl_stack_size;
 907
 908                 ret = criu_checkpoint_queue(pdd, q, q_data);
 909                 if (ret)
 910                         break;
 911
 912                 q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
 913
 914                 ret = copy_to_user(user_priv + *queues_priv_data_offset,
 915                                 q_data, q_data_size);
 916                 if (ret) {
 917                         ret = -EFAULT;
 918                         break;
 919                 }
 920                 *queues_priv_data_offset += q_data_size;
 921                 *q_index = *q_index + 1;
 922         }
 923
 924         kfree(q_private_data);
 925
 926         return ret;
 927 }
 928
 929 int kfd_criu_checkpoint_queues(struct kfd_process *p,
 930                          uint8_t __user *user_priv_data,
 931                          uint64_t *priv_data_offset)
 932 {
 933         int ret = 0, pdd_index, q_index = 0;
 934
 935         for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
 936                 struct kfd_process_device *pdd = p->pdds[pdd_index];
 937
 938                 /*
 939                  * criu_checkpoint_queues_device will copy data to user and update q_index and
 940                  * queues_priv_data_offset
 941                  */
 942                 ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
 943                                               priv_data_offset);
 944
 945                 if (ret)
 946                         break;
 947         }
 948
 949         return ret;
 950 }
 951
 952 static void set_queue_properties_from_criu(struct queue_properties *qp,
 953                                           struct kfd_criu_queue_priv_data *q_data)
 954 {
 955         qp->is_interop = false;
 956         qp->queue_percent = q_data->q_percent;
 957         qp->priority = q_data->priority;
 958         qp->queue_address = q_data->q_address;
 959         qp->queue_size = q_data->q_size;
 960         qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
 961         qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
 962         qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
 963         qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
 964         qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
 965         qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
 966         qp->ctl_stack_size = q_data->ctl_stack_size;
 967         qp->type = q_data->type;
 968         qp->format = q_data->format;
 969 }
 970
 971 int kfd_criu_restore_queue(struct kfd_process *p,
 972                            uint8_t __user *user_priv_ptr,
 973                            uint64_t *priv_data_offset,
 974                            uint64_t max_priv_data_size)
 975 {
 976         uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
 977         struct kfd_criu_queue_priv_data *q_data;
 978         struct kfd_process_device *pdd;
 979         uint64_t q_extra_data_size;
 980         struct queue_properties qp;
 981         unsigned int queue_id;
 982         int ret = 0;
 983
 984         if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
 985                 return -EINVAL;
 986
 987         q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
 988         if (!q_data)
 989                 return -ENOMEM;
 990
 991         ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
 992         if (ret) {
 993                 ret = -EFAULT;
 994                 goto exit;
 995         }
 996
 997         *priv_data_offset += sizeof(*q_data);
 998         q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
 999
1000         if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
1001                 ret = -EINVAL;
1002                 goto exit;
1003         }
1004
1005         q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
1006         if (!q_extra_data) {
1007                 ret = -ENOMEM;
1008                 goto exit;
1009         }
1010
1011         ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
1012         if (ret) {
1013                 ret = -EFAULT;
1014                 goto exit;
1015         }
1016
1017         *priv_data_offset += q_extra_data_size;
1018
1019         pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
1020         if (!pdd) {
1021                 pr_err("Failed to get pdd\n");
1022                 ret = -EINVAL;
1023                 goto exit;
1024         }
1025
1026         /* data stored in this order: mqd, ctl_stack */
1027         mqd = q_extra_data;
1028         ctl_stack = mqd + q_data->mqd_size;
1029
1030         memset(&qp, 0, sizeof(qp));
1031         set_queue_properties_from_criu(&qp, q_data);
1032
1033         print_queue_properties(&qp);
1034
1035         ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
1036         if (ret) {
1037                 pr_err("Failed to create new queue err:%d\n", ret);
1038                 goto exit;
1039         }
1040
1041         if (q_data->gws)
1042                 ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
1043
1044 exit:
1045         if (ret)
1046                 pr_err("Failed to restore queue (%d)\n", ret);
1047         else
1048                 pr_debug("Queue id %d was restored successfully\n", queue_id);
1049
1050         kfree(q_data);
1051         kfree(q_extra_data);
1052
1053         return ret;
1054 }
1055
1056 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
1057                                   unsigned int qid,
1058                                   uint32_t *mqd_size,
1059                                   uint32_t *ctl_stack_size)
1060 {
1061         struct process_queue_node *pqn;
1062
1063         pqn = get_queue_by_qid(pqm, qid);
1064         if (!pqn) {
1065                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
1066                 return -EFAULT;
1067         }
1068
1069         if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
1070                 pr_err("amdkfd: queue dumping not supported on this device\n");
1071                 return -EOPNOTSUPP;
1072         }
1073
1074         pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
1075                                                        pqn->q, mqd_size,
1076                                                        ctl_stack_size);
1077         return 0;
1078 }
1079
1080 #if defined(CONFIG_DEBUG_FS)
1081
1082 int pqm_debugfs_mqds(struct seq_file *m, void *data)
1083 {
1084         struct process_queue_manager *pqm = data;
1085         struct process_queue_node *pqn;
1086         struct queue *q;
1087         enum KFD_MQD_TYPE mqd_type;
1088         struct mqd_manager *mqd_mgr;
1089         int r = 0, xcc, num_xccs = 1;
1090         void *mqd;
1091         uint64_t size = 0;
1092
1093         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1094                 if (pqn->q) {
1095                         q = pqn->q;
1096                         switch (q->properties.type) {
1097                         case KFD_QUEUE_TYPE_SDMA:
1098                         case KFD_QUEUE_TYPE_SDMA_XGMI:
1099                                 seq_printf(m, "  SDMA queue on device %x\n",
1100                                            q->device->id);
1101                                 mqd_type = KFD_MQD_TYPE_SDMA;
1102                                 break;
1103                         case KFD_QUEUE_TYPE_COMPUTE:
1104                                 seq_printf(m, "  Compute queue on device %x\n",
1105                                            q->device->id);
1106                                 mqd_type = KFD_MQD_TYPE_CP;
1107                                 num_xccs = NUM_XCC(q->device->xcc_mask);
1108                                 break;
1109                         default:
1110                                 seq_printf(m,
1111                                 "  Bad user queue type %d on device %x\n",
1112                                            q->properties.type, q->device->id);
1113                                 continue;
1114                         }
1115                         mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1116                         size = mqd_mgr->mqd_stride(mqd_mgr,
1117                                                         &q->properties);
1118                 } else if (pqn->kq) {
1119                         q = pqn->kq->queue;
1120                         mqd_mgr = pqn->kq->mqd_mgr;
1121                         switch (q->properties.type) {
1122                         case KFD_QUEUE_TYPE_DIQ:
1123                                 seq_printf(m, "  DIQ on device %x\n",
1124                                            pqn->kq->dev->id);
1125                                 break;
1126                         default:
1127                                 seq_printf(m,
1128                                 "  Bad kernel queue type %d on device %x\n",
1129                                            q->properties.type,
1130                                            pqn->kq->dev->id);
1131                                 continue;
1132                         }
1133                 } else {
1134                         seq_printf(m,
1135                 "  Weird: Queue node with neither kernel nor user queue\n");
1136                         continue;
1137                 }
1138
1139                 for (xcc = 0; xcc < num_xccs; xcc++) {
1140                         mqd = q->mqd + size * xcc;
1141                         r = mqd_mgr->debugfs_show_mqd(m, mqd);
1142                         if (r != 0)
1143                                 break;
1144                 }
1145         }
1146
1147         return r;
1148 }
1149
1150 #endif