drivers/gpu/drm/amd/amdkfd/kfd_process.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include <linux/mutex.h>
  24 #include <linux/log2.h>
  25 #include <linux/sched.h>
  26 #include <linux/sched/mm.h>
  27 #include <linux/sched/task.h>
  28 #include <linux/mmu_context.h>
  29 #include <linux/slab.h>
  30 #include <linux/amd-iommu.h>
  31 #include <linux/notifier.h>
  32 #include <linux/compat.h>
  33 #include <linux/mman.h>
  34 #include <linux/file.h>
  35 #include <linux/pm_runtime.h>
  36 #include "amdgpu_amdkfd.h"
  37 #include "amdgpu.h"
  38
  39 struct mm_struct;
  40
  41 #include "kfd_priv.h"
  42 #include "kfd_device_queue_manager.h"
  43 #include "kfd_dbgmgr.h"
  44 #include "kfd_iommu.h"
  45
  46 /*
  47  * List of struct kfd_process (field kfd_process).
  48  * Unique/indexed by mm_struct*
  49  */
  50 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
  51 static DEFINE_MUTEX(kfd_processes_mutex);
  52
  53 DEFINE_SRCU(kfd_processes_srcu);
  54
  55 /* For process termination handling */
  56 static struct workqueue_struct *kfd_process_wq;
  57
  58 /* Ordered, single-threaded workqueue for restoring evicted
  59  * processes. Restoring multiple processes concurrently under memory
  60  * pressure can lead to processes blocking each other from validating
  61  * their BOs and result in a live-lock situation where processes
  62  * remain evicted indefinitely.
  63  */
  64 static struct workqueue_struct *kfd_restore_wq;
  65
  66 static struct kfd_process *find_process(const struct task_struct *thread);
  67 static void kfd_process_ref_release(struct kref *ref);
  68 static struct kfd_process *create_process(const struct task_struct *thread);
  69 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
  70
  71 static void evict_process_worker(struct work_struct *work);
  72 static void restore_process_worker(struct work_struct *work);
  73
  74 struct kfd_procfs_tree {
  75         struct kobject *kobj;
  76 };
  77
  78 static struct kfd_procfs_tree procfs;
  79
  80 /*
  81  * Structure for SDMA activity tracking
  82  */
  83 struct kfd_sdma_activity_handler_workarea {
  84         struct work_struct sdma_activity_work;
  85         struct kfd_process_device *pdd;
  86         uint64_t sdma_activity_counter;
  87 };
  88
  89 struct temp_sdma_queue_list {
  90         uint64_t __user *rptr;
  91         uint64_t sdma_val;
  92         unsigned int queue_id;
  93         struct list_head list;
  94 };
  95
  96 static void kfd_sdma_activity_worker(struct work_struct *work)
  97 {
  98         struct kfd_sdma_activity_handler_workarea *workarea;
  99         struct kfd_process_device *pdd;
 100         uint64_t val;
 101         struct mm_struct *mm;
 102         struct queue *q;
 103         struct qcm_process_device *qpd;
 104         struct device_queue_manager *dqm;
 105         int ret = 0;
 106         struct temp_sdma_queue_list sdma_q_list;
 107         struct temp_sdma_queue_list *sdma_q, *next;
 108
 109         workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
 110                                 sdma_activity_work);
 111         if (!workarea)
 112                 return;
 113
 114         pdd = workarea->pdd;
 115         if (!pdd)
 116                 return;
 117         dqm = pdd->dev->dqm;
 118         qpd = &pdd->qpd;
 119         if (!dqm || !qpd)
 120                 return;
 121         /*
 122          * Total SDMA activity is current SDMA activity + past SDMA activity
 123          * Past SDMA count is stored in pdd.
 124          * To get the current activity counters for all active SDMA queues,
 125          * we loop over all SDMA queues and get their counts from user-space.
 126          *
 127          * We cannot call get_user() with dqm_lock held as it can cause
 128          * a circular lock dependency situation. To read the SDMA stats,
 129          * we need to do the following:
 130          *
 131          * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
 132          *    with dqm_lock/dqm_unlock().
 133          * 2. Call get_user() for each node in temporary list without dqm_lock.
 134          *    Save the SDMA count for each node and also add the count to the total
 135          *    SDMA count counter.
 136          *    Its possible, during this step, a few SDMA queue nodes got deleted
 137          *    from the qpd->queues_list.
 138          * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
 139          *    If any node got deleted, its SDMA count would be captured in the sdma
 140          *    past activity counter. So subtract the SDMA counter stored in step 2
 141          *    for this node from the total SDMA count.
 142          */
 143         INIT_LIST_HEAD(&sdma_q_list.list);
 144
 145         /*
 146          * Create the temp list of all SDMA queues
 147          */
 148         dqm_lock(dqm);
 149
 150         list_for_each_entry(q, &qpd->queues_list, list) {
 151                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
 152                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
 153                         continue;
 154
 155                 sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
 156                 if (!sdma_q) {
 157                         dqm_unlock(dqm);
 158                         goto cleanup;
 159                 }
 160
 161                 INIT_LIST_HEAD(&sdma_q->list);
 162                 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
 163                 sdma_q->queue_id = q->properties.queue_id;
 164                 list_add_tail(&sdma_q->list, &sdma_q_list.list);
 165         }
 166
 167         /*
 168          * If the temp list is empty, then no SDMA queues nodes were found in
 169          * qpd->queues_list. Return the past activity count as the total sdma
 170          * count
 171          */
 172         if (list_empty(&sdma_q_list.list)) {
 173                 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
 174                 dqm_unlock(dqm);
 175                 return;
 176         }
 177
 178         dqm_unlock(dqm);
 179
 180         /*
 181          * Get the usage count for each SDMA queue in temp_list.
 182          */
 183         mm = get_task_mm(pdd->process->lead_thread);
 184         if (!mm)
 185                 goto cleanup;
 186
 187         kthread_use_mm(mm);
 188
 189         list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
 190                 val = 0;
 191                 ret = read_sdma_queue_counter(sdma_q->rptr, &val);
 192                 if (ret) {
 193                         pr_debug("Failed to read SDMA queue active counter for queue id: %d",
 194                                  sdma_q->queue_id);
 195                 } else {
 196                         sdma_q->sdma_val = val;
 197                         workarea->sdma_activity_counter += val;
 198                 }
 199         }
 200
 201         kthread_unuse_mm(mm);
 202         mmput(mm);
 203
 204         /*
 205          * Do a second iteration over qpd_queues_list to check if any SDMA
 206          * nodes got deleted while fetching SDMA counter.
 207          */
 208         dqm_lock(dqm);
 209
 210         workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
 211
 212         list_for_each_entry(q, &qpd->queues_list, list) {
 213                 if (list_empty(&sdma_q_list.list))
 214                         break;
 215
 216                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
 217                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
 218                         continue;
 219
 220                 list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 221                         if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
 222                              (sdma_q->queue_id == q->properties.queue_id)) {
 223                                 list_del(&sdma_q->list);
 224                                 kfree(sdma_q);
 225                                 break;
 226                         }
 227                 }
 228         }
 229
 230         dqm_unlock(dqm);
 231
 232         /*
 233          * If temp list is not empty, it implies some queues got deleted
 234          * from qpd->queues_list during SDMA usage read. Subtract the SDMA
 235          * count for each node from the total SDMA count.
 236          */
 237         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 238                 workarea->sdma_activity_counter -= sdma_q->sdma_val;
 239                 list_del(&sdma_q->list);
 240                 kfree(sdma_q);
 241         }
 242
 243         return;
 244
 245 cleanup:
 246         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
 247                 list_del(&sdma_q->list);
 248                 kfree(sdma_q);
 249         }
 250 }
 251
 252 /**
 253  * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
 254  * by current process. Translates acquired wave count into number of compute units
 255  * that are occupied.
 256  *
 257  * @atr: Handle of attribute that allows reporting of wave count. The attribute
 258  * handle encapsulates GPU device it is associated with, thereby allowing collection
 259  * of waves in flight, etc
 260  *
 261  * @buffer: Handle of user provided buffer updated with wave count
 262  *
 263  * Return: Number of bytes written to user buffer or an error value
 264  */
 265 static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
 266 {
 267         int cu_cnt;
 268         int wave_cnt;
 269         int max_waves_per_cu;
 270         struct kfd_dev *dev = NULL;
 271         struct kfd_process *proc = NULL;
 272         struct kfd_process_device *pdd = NULL;
 273
 274         pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
 275         dev = pdd->dev;
 276         if (dev->kfd2kgd->get_cu_occupancy == NULL)
 277                 return -EINVAL;
 278
 279         cu_cnt = 0;
 280         proc = pdd->process;
 281         if (pdd->qpd.queue_count == 0) {
 282                 pr_debug("Gpu-Id: %d has no active queues for process %d\n",
 283                          dev->id, proc->pasid);
 284                 return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
 285         }
 286
 287         /* Collect wave count from device if it supports */
 288         wave_cnt = 0;
 289         max_waves_per_cu = 0;
 290         dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
 291                         &max_waves_per_cu);
 292
 293         /* Translate wave count to number of compute units */
 294         cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
 295         return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
 296 }
 297
 298 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 299                                char *buffer)
 300 {
 301         if (strcmp(attr->name, "pasid") == 0) {
 302                 struct kfd_process *p = container_of(attr, struct kfd_process,
 303                                                      attr_pasid);
 304
 305                 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
 306         } else if (strncmp(attr->name, "vram_", 5) == 0) {
 307                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 308                                                               attr_vram);
 309                 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
 310         } else if (strncmp(attr->name, "sdma_", 5) == 0) {
 311                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 312                                                               attr_sdma);
 313                 struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
 314
 315                 INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
 316                                         kfd_sdma_activity_worker);
 317
 318                 sdma_activity_work_handler.pdd = pdd;
 319                 sdma_activity_work_handler.sdma_activity_counter = 0;
 320
 321                 schedule_work(&sdma_activity_work_handler.sdma_activity_work);
 322
 323                 flush_work(&sdma_activity_work_handler.sdma_activity_work);
 324
 325                 return snprintf(buffer, PAGE_SIZE, "%llu\n",
 326                                 (sdma_activity_work_handler.sdma_activity_counter)/
 327                                  SDMA_ACTIVITY_DIVISOR);
 328         } else {
 329                 pr_err("Invalid attribute");
 330                 return -EINVAL;
 331         }
 332
 333         return 0;
 334 }
 335
 336 static void kfd_procfs_kobj_release(struct kobject *kobj)
 337 {
 338         kfree(kobj);
 339 }
 340
 341 static const struct sysfs_ops kfd_procfs_ops = {
 342         .show = kfd_procfs_show,
 343 };
 344
 345 static struct kobj_type procfs_type = {
 346         .release = kfd_procfs_kobj_release,
 347         .sysfs_ops = &kfd_procfs_ops,
 348 };
 349
 350 void kfd_procfs_init(void)
 351 {
 352         int ret = 0;
 353
 354         procfs.kobj = kfd_alloc_struct(procfs.kobj);
 355         if (!procfs.kobj)
 356                 return;
 357
 358         ret = kobject_init_and_add(procfs.kobj, &procfs_type,
 359                                    &kfd_device->kobj, "proc");
 360         if (ret) {
 361                 pr_warn("Could not create procfs proc folder");
 362                 /* If we fail to create the procfs, clean up */
 363                 kfd_procfs_shutdown();
 364         }
 365 }
 366
 367 void kfd_procfs_shutdown(void)
 368 {
 369         if (procfs.kobj) {
 370                 kobject_del(procfs.kobj);
 371                 kobject_put(procfs.kobj);
 372                 procfs.kobj = NULL;
 373         }
 374 }
 375
 376 static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
 377                                      struct attribute *attr, char *buffer)
 378 {
 379         struct queue *q = container_of(kobj, struct queue, kobj);
 380
 381         if (!strcmp(attr->name, "size"))
 382                 return snprintf(buffer, PAGE_SIZE, "%llu",
 383                                 q->properties.queue_size);
 384         else if (!strcmp(attr->name, "type"))
 385                 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
 386         else if (!strcmp(attr->name, "gpuid"))
 387                 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
 388         else
 389                 pr_err("Invalid attribute");
 390
 391         return 0;
 392 }
 393
 394 static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
 395                                      struct attribute *attr, char *buffer)
 396 {
 397         if (strcmp(attr->name, "evicted_ms") == 0) {
 398                 struct kfd_process_device *pdd = container_of(attr,
 399                                 struct kfd_process_device,
 400                                 attr_evict);
 401                 uint64_t evict_jiffies;
 402
 403                 evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
 404
 405                 return snprintf(buffer,
 406                                 PAGE_SIZE,
 407                                 "%llu\n",
 408                                 jiffies64_to_msecs(evict_jiffies));
 409
 410         /* Sysfs handle that gets CU occupancy is per device */
 411         } else if (strcmp(attr->name, "cu_occupancy") == 0) {
 412                 return kfd_get_cu_occupancy(attr, buffer);
 413         } else {
 414                 pr_err("Invalid attribute");
 415         }
 416
 417         return 0;
 418 }
 419
 420 static struct attribute attr_queue_size = {
 421         .name = "size",
 422         .mode = KFD_SYSFS_FILE_MODE
 423 };
 424
 425 static struct attribute attr_queue_type = {
 426         .name = "type",
 427         .mode = KFD_SYSFS_FILE_MODE
 428 };
 429
 430 static struct attribute attr_queue_gpuid = {
 431         .name = "gpuid",
 432         .mode = KFD_SYSFS_FILE_MODE
 433 };
 434
 435 static struct attribute *procfs_queue_attrs[] = {
 436         &attr_queue_size,
 437         &attr_queue_type,
 438         &attr_queue_gpuid,
 439         NULL
 440 };
 441
 442 static const struct sysfs_ops procfs_queue_ops = {
 443         .show = kfd_procfs_queue_show,
 444 };
 445
 446 static struct kobj_type procfs_queue_type = {
 447         .sysfs_ops = &procfs_queue_ops,
 448         .default_attrs = procfs_queue_attrs,
 449 };
 450
 451 static const struct sysfs_ops procfs_stats_ops = {
 452         .show = kfd_procfs_stats_show,
 453 };
 454
 455 static struct attribute *procfs_stats_attrs[] = {
 456         NULL
 457 };
 458
 459 static struct kobj_type procfs_stats_type = {
 460         .sysfs_ops = &procfs_stats_ops,
 461         .default_attrs = procfs_stats_attrs,
 462 };
 463
 464 int kfd_procfs_add_queue(struct queue *q)
 465 {
 466         struct kfd_process *proc;
 467         int ret;
 468
 469         if (!q || !q->process)
 470                 return -EINVAL;
 471         proc = q->process;
 472
 473         /* Create proc/<pid>/queues/<queue id> folder */
 474         if (!proc->kobj_queues)
 475                 return -EFAULT;
 476         ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
 477                         proc->kobj_queues, "%u", q->properties.queue_id);
 478         if (ret < 0) {
 479                 pr_warn("Creating proc/<pid>/queues/%u failed",
 480                         q->properties.queue_id);
 481                 kobject_put(&q->kobj);
 482                 return ret;
 483         }
 484
 485         return 0;
 486 }
 487
 488 static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
 489                                  char *name)
 490 {
 491         int ret = 0;
 492
 493         if (!p || !attr || !name)
 494                 return -EINVAL;
 495
 496         attr->name = name;
 497         attr->mode = KFD_SYSFS_FILE_MODE;
 498         sysfs_attr_init(attr);
 499
 500         ret = sysfs_create_file(p->kobj, attr);
 501
 502         return ret;
 503 }
 504
 505 static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
 506 {
 507         int ret = 0;
 508         int i;
 509         char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
 510
 511         if (!p)
 512                 return -EINVAL;
 513
 514         if (!p->kobj)
 515                 return -EFAULT;
 516
 517         /*
 518          * Create sysfs files for each GPU:
 519          * - proc/<pid>/stats_<gpuid>/
 520          * - proc/<pid>/stats_<gpuid>/evicted_ms
 521          * - proc/<pid>/stats_<gpuid>/cu_occupancy
 522          */
 523         for (i = 0; i < p->n_pdds; i++) {
 524                 struct kfd_process_device *pdd = p->pdds[i];
 525                 struct kobject *kobj_stats;
 526
 527                 snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
 528                                 "stats_%u", pdd->dev->id);
 529                 kobj_stats = kfd_alloc_struct(kobj_stats);
 530                 if (!kobj_stats)
 531                         return -ENOMEM;
 532
 533                 ret = kobject_init_and_add(kobj_stats,
 534                                                 &procfs_stats_type,
 535                                                 p->kobj,
 536                                                 stats_dir_filename);
 537
 538                 if (ret) {
 539                         pr_warn("Creating KFD proc/stats_%s folder failed",
 540                                         stats_dir_filename);
 541                         kobject_put(kobj_stats);
 542                         goto err;
 543                 }
 544
 545                 pdd->kobj_stats = kobj_stats;
 546                 pdd->attr_evict.name = "evicted_ms";
 547                 pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
 548                 sysfs_attr_init(&pdd->attr_evict);
 549                 ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
 550                 if (ret)
 551                         pr_warn("Creating eviction stats for gpuid %d failed",
 552                                         (int)pdd->dev->id);
 553
 554                 /* Add sysfs file to report compute unit occupancy */
 555                 if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
 556                         pdd->attr_cu_occupancy.name = "cu_occupancy";
 557                         pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
 558                         sysfs_attr_init(&pdd->attr_cu_occupancy);
 559                         ret = sysfs_create_file(kobj_stats,
 560                                                 &pdd->attr_cu_occupancy);
 561                         if (ret)
 562                                 pr_warn("Creating %s failed for gpuid: %d",
 563                                         pdd->attr_cu_occupancy.name,
 564                                         (int)pdd->dev->id);
 565                 }
 566         }
 567 err:
 568         return ret;
 569 }
 570
 571
 572 static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
 573 {
 574         int ret = 0;
 575         int i;
 576
 577         if (!p)
 578                 return -EINVAL;
 579
 580         if (!p->kobj)
 581                 return -EFAULT;
 582
 583         /*
 584          * Create sysfs files for each GPU:
 585          * - proc/<pid>/vram_<gpuid>
 586          * - proc/<pid>/sdma_<gpuid>
 587          */
 588         for (i = 0; i < p->n_pdds; i++) {
 589                 struct kfd_process_device *pdd = p->pdds[i];
 590
 591                 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
 592                          pdd->dev->id);
 593                 ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
 594                 if (ret)
 595                         pr_warn("Creating vram usage for gpu id %d failed",
 596                                 (int)pdd->dev->id);
 597
 598                 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
 599                          pdd->dev->id);
 600                 ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
 601                 if (ret)
 602                         pr_warn("Creating sdma usage for gpu id %d failed",
 603                                 (int)pdd->dev->id);
 604         }
 605
 606         return ret;
 607 }
 608
 609 void kfd_procfs_del_queue(struct queue *q)
 610 {
 611         if (!q)
 612                 return;
 613
 614         kobject_del(&q->kobj);
 615         kobject_put(&q->kobj);
 616 }
 617
 618 int kfd_process_create_wq(void)
 619 {
 620         if (!kfd_process_wq)
 621                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 622         if (!kfd_restore_wq)
 623                 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
 624
 625         if (!kfd_process_wq || !kfd_restore_wq) {
 626                 kfd_process_destroy_wq();
 627                 return -ENOMEM;
 628         }
 629
 630         return 0;
 631 }
 632
 633 void kfd_process_destroy_wq(void)
 634 {
 635         if (kfd_process_wq) {
 636                 destroy_workqueue(kfd_process_wq);
 637                 kfd_process_wq = NULL;
 638         }
 639         if (kfd_restore_wq) {
 640                 destroy_workqueue(kfd_restore_wq);
 641                 kfd_restore_wq = NULL;
 642         }
 643 }
 644
 645 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 646                         struct kfd_process_device *pdd)
 647 {
 648         struct kfd_dev *dev = pdd->dev;
 649
 650         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
 651         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
 652 }
 653
 654 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
 655  *      This function should be only called right after the process
 656  *      is created and when kfd_processes_mutex is still being held
 657  *      to avoid concurrency. Because of that exclusiveness, we do
 658  *      not need to take p->mutex.
 659  */
 660 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 661                                    uint64_t gpu_va, uint32_t size,
 662                                    uint32_t flags, void **kptr)
 663 {
 664         struct kfd_dev *kdev = pdd->dev;
 665         struct kgd_mem *mem = NULL;
 666         int handle;
 667         int err;
 668
 669         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
 670                                                  pdd->vm, &mem, NULL, flags);
 671         if (err)
 672                 goto err_alloc_mem;
 673
 674         err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
 675         if (err)
 676                 goto err_map_mem;
 677
 678         err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
 679         if (err) {
 680                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
 681                 goto sync_memory_failed;
 682         }
 683
 684         /* Create an obj handle so kfd_process_device_remove_obj_handle
 685          * will take care of the bo removal when the process finishes.
 686          * We do not need to take p->mutex, because the process is just
 687          * created and the ioctls have not had the chance to run.
 688          */
 689         handle = kfd_process_device_create_obj_handle(pdd, mem);
 690
 691         if (handle < 0) {
 692                 err = handle;
 693                 goto free_gpuvm;
 694         }
 695
 696         if (kptr) {
 697                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
 698                                 (struct kgd_mem *)mem, kptr, NULL);
 699                 if (err) {
 700                         pr_debug("Map GTT BO to kernel failed\n");
 701                         goto free_obj_handle;
 702                 }
 703         }
 704
 705         return err;
 706
 707 free_obj_handle:
 708         kfd_process_device_remove_obj_handle(pdd, handle);
 709 free_gpuvm:
 710 sync_memory_failed:
 711         kfd_process_free_gpuvm(mem, pdd);
 712         return err;
 713
 714 err_map_mem:
 715         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
 716 err_alloc_mem:
 717         *kptr = NULL;
 718         return err;
 719 }
 720
 721 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
 722  *      process for IB usage The memory reserved is for KFD to submit
 723  *      IB to AMDGPU from kernel.  If the memory is reserved
 724  *      successfully, ib_kaddr will have the CPU/kernel
 725  *      address. Check ib_kaddr before accessing the memory.
 726  */
 727 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 728 {
 729         struct qcm_process_device *qpd = &pdd->qpd;
 730         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
 731                         KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
 732                         KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
 733                         KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 734         void *kaddr;
 735         int ret;
 736
 737         if (qpd->ib_kaddr || !qpd->ib_base)
 738                 return 0;
 739
 740         /* ib_base is only set for dGPU */
 741         ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
 742                                       &kaddr);
 743         if (ret)
 744                 return ret;
 745
 746         qpd->ib_kaddr = kaddr;
 747
 748         return 0;
 749 }
 750
 751 struct kfd_process *kfd_create_process(struct file *filep)
 752 {
 753         struct kfd_process *process;
 754         struct task_struct *thread = current;
 755         int ret;
 756
 757         if (!thread->mm)
 758                 return ERR_PTR(-EINVAL);
 759
 760         /* Only the pthreads threading model is supported. */
 761         if (thread->group_leader->mm != thread->mm)
 762                 return ERR_PTR(-EINVAL);
 763
 764         /*
 765          * take kfd processes mutex before starting of process creation
 766          * so there won't be a case where two threads of the same process
 767          * create two kfd_process structures
 768          */
 769         mutex_lock(&kfd_processes_mutex);
 770
 771         /* A prior open of /dev/kfd could have already created the process. */
 772         process = find_process(thread);
 773         if (process) {
 774                 pr_debug("Process already found\n");
 775         } else {
 776                 process = create_process(thread);
 777                 if (IS_ERR(process))
 778                         goto out;
 779
 780                 ret = kfd_process_init_cwsr_apu(process, filep);
 781                 if (ret)
 782                         goto out_destroy;
 783
 784                 if (!procfs.kobj)
 785                         goto out;
 786
 787                 process->kobj = kfd_alloc_struct(process->kobj);
 788                 if (!process->kobj) {
 789                         pr_warn("Creating procfs kobject failed");
 790                         goto out;
 791                 }
 792                 ret = kobject_init_and_add(process->kobj, &procfs_type,
 793                                            procfs.kobj, "%d",
 794                                            (int)process->lead_thread->pid);
 795                 if (ret) {
 796                         pr_warn("Creating procfs pid directory failed");
 797                         kobject_put(process->kobj);
 798                         goto out;
 799                 }
 800
 801                 process->attr_pasid.name = "pasid";
 802                 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
 803                 sysfs_attr_init(&process->attr_pasid);
 804                 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
 805                 if (ret)
 806                         pr_warn("Creating pasid for pid %d failed",
 807                                         (int)process->lead_thread->pid);
 808
 809                 process->kobj_queues = kobject_create_and_add("queues",
 810                                                         process->kobj);
 811                 if (!process->kobj_queues)
 812                         pr_warn("Creating KFD proc/queues folder failed");
 813
 814                 ret = kfd_procfs_add_sysfs_stats(process);
 815                 if (ret)
 816                         pr_warn("Creating sysfs stats dir for pid %d failed",
 817                                 (int)process->lead_thread->pid);
 818
 819                 ret = kfd_procfs_add_sysfs_files(process);
 820                 if (ret)
 821                         pr_warn("Creating sysfs usage file for pid %d failed",
 822                                 (int)process->lead_thread->pid);
 823         }
 824 out:
 825         if (!IS_ERR(process))
 826                 kref_get(&process->ref);
 827         mutex_unlock(&kfd_processes_mutex);
 828
 829         return process;
 830
 831 out_destroy:
 832         hash_del_rcu(&process->kfd_processes);
 833         mutex_unlock(&kfd_processes_mutex);
 834         synchronize_srcu(&kfd_processes_srcu);
 835         /* kfd_process_free_notifier will trigger the cleanup */
 836         mmu_notifier_put(&process->mmu_notifier);
 837         return ERR_PTR(ret);
 838 }
 839
 840 struct kfd_process *kfd_get_process(const struct task_struct *thread)
 841 {
 842         struct kfd_process *process;
 843
 844         if (!thread->mm)
 845                 return ERR_PTR(-EINVAL);
 846
 847         /* Only the pthreads threading model is supported. */
 848         if (thread->group_leader->mm != thread->mm)
 849                 return ERR_PTR(-EINVAL);
 850
 851         process = find_process(thread);
 852         if (!process)
 853                 return ERR_PTR(-EINVAL);
 854
 855         return process;
 856 }
 857
 858 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
 859 {
 860         struct kfd_process *process;
 861
 862         hash_for_each_possible_rcu(kfd_processes_table, process,
 863                                         kfd_processes, (uintptr_t)mm)
 864                 if (process->mm == mm)
 865                         return process;
 866
 867         return NULL;
 868 }
 869
 870 static struct kfd_process *find_process(const struct task_struct *thread)
 871 {
 872         struct kfd_process *p;
 873         int idx;
 874
 875         idx = srcu_read_lock(&kfd_processes_srcu);
 876         p = find_process_by_mm(thread->mm);
 877         srcu_read_unlock(&kfd_processes_srcu, idx);
 878
 879         return p;
 880 }
 881
 882 void kfd_unref_process(struct kfd_process *p)
 883 {
 884         kref_put(&p->ref, kfd_process_ref_release);
 885 }
 886
 887
 888 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 889 {
 890         struct kfd_process *p = pdd->process;
 891         void *mem;
 892         int id;
 893         int i;
 894
 895         /*
 896          * Remove all handles from idr and release appropriate
 897          * local memory object
 898          */
 899         idr_for_each_entry(&pdd->alloc_idr, mem, id) {
 900
 901                 for (i = 0; i < p->n_pdds; i++) {
 902                         struct kfd_process_device *peer_pdd = p->pdds[i];
 903
 904                         if (!peer_pdd->vm)
 905                                 continue;
 906                         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 907                                 peer_pdd->dev->kgd, mem, peer_pdd->vm);
 908                 }
 909
 910                 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
 911                 kfd_process_device_remove_obj_handle(pdd, id);
 912         }
 913 }
 914
 915 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
 916 {
 917         int i;
 918
 919         for (i = 0; i < p->n_pdds; i++)
 920                 kfd_process_device_free_bos(p->pdds[i]);
 921 }
 922
 923 static void kfd_process_destroy_pdds(struct kfd_process *p)
 924 {
 925         int i;
 926
 927         for (i = 0; i < p->n_pdds; i++) {
 928                 struct kfd_process_device *pdd = p->pdds[i];
 929
 930                 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 931                                 pdd->dev->id, p->pasid);
 932
 933                 if (pdd->drm_file) {
 934                         amdgpu_amdkfd_gpuvm_release_process_vm(
 935                                         pdd->dev->kgd, pdd->vm);
 936                         fput(pdd->drm_file);
 937                 }
 938
 939                 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
 940                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 941                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
 942
 943                 kfree(pdd->qpd.doorbell_bitmap);
 944                 idr_destroy(&pdd->alloc_idr);
 945
 946                 kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
 947
 948                 /*
 949                  * before destroying pdd, make sure to report availability
 950                  * for auto suspend
 951                  */
 952                 if (pdd->runtime_inuse) {
 953                         pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
 954                         pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
 955                         pdd->runtime_inuse = false;
 956                 }
 957
 958                 kfree(pdd);
 959                 p->pdds[i] = NULL;
 960         }
 961         p->n_pdds = 0;
 962 }
 963
 964 /* No process locking is needed in this function, because the process
 965  * is not findable any more. We must assume that no other thread is
 966  * using it any more, otherwise we couldn't safely free the process
 967  * structure in the end.
 968  */
 969 static void kfd_process_wq_release(struct work_struct *work)
 970 {
 971         struct kfd_process *p = container_of(work, struct kfd_process,
 972                                              release_work);
 973         int i;
 974
 975         /* Remove the procfs files */
 976         if (p->kobj) {
 977                 sysfs_remove_file(p->kobj, &p->attr_pasid);
 978                 kobject_del(p->kobj_queues);
 979                 kobject_put(p->kobj_queues);
 980                 p->kobj_queues = NULL;
 981
 982                 for (i = 0; i < p->n_pdds; i++) {
 983                         struct kfd_process_device *pdd = p->pdds[i];
 984
 985                         sysfs_remove_file(p->kobj, &pdd->attr_vram);
 986                         sysfs_remove_file(p->kobj, &pdd->attr_sdma);
 987                         sysfs_remove_file(p->kobj, &pdd->attr_evict);
 988                         if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
 989                                 sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
 990                         kobject_del(pdd->kobj_stats);
 991                         kobject_put(pdd->kobj_stats);
 992                         pdd->kobj_stats = NULL;
 993                 }
 994
 995                 kobject_del(p->kobj);
 996                 kobject_put(p->kobj);
 997                 p->kobj = NULL;
 998         }
 999
1000         kfd_iommu_unbind_process(p);
1001
1002         kfd_process_free_outstanding_kfd_bos(p);
1003
1004         kfd_process_destroy_pdds(p);
1005         dma_fence_put(p->ef);
1006
1007         kfd_event_free_process(p);
1008
1009         kfd_pasid_free(p->pasid);
1010         mutex_destroy(&p->mutex);
1011
1012         put_task_struct(p->lead_thread);
1013
1014         kfree(p);
1015 }
1016
1017 static void kfd_process_ref_release(struct kref *ref)
1018 {
1019         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
1020
1021         INIT_WORK(&p->release_work, kfd_process_wq_release);
1022         queue_work(kfd_process_wq, &p->release_work);
1023 }
1024
1025 static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
1026 {
1027         int idx = srcu_read_lock(&kfd_processes_srcu);
1028         struct kfd_process *p = find_process_by_mm(mm);
1029
1030         srcu_read_unlock(&kfd_processes_srcu, idx);
1031
1032         return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
1033 }
1034
1035 static void kfd_process_free_notifier(struct mmu_notifier *mn)
1036 {
1037         kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
1038 }
1039
1040 static void kfd_process_notifier_release(struct mmu_notifier *mn,
1041                                         struct mm_struct *mm)
1042 {
1043         struct kfd_process *p;
1044         int i;
1045
1046         /*
1047          * The kfd_process structure can not be free because the
1048          * mmu_notifier srcu is read locked
1049          */
1050         p = container_of(mn, struct kfd_process, mmu_notifier);
1051         if (WARN_ON(p->mm != mm))
1052                 return;
1053
1054         mutex_lock(&kfd_processes_mutex);
1055         hash_del_rcu(&p->kfd_processes);
1056         mutex_unlock(&kfd_processes_mutex);
1057         synchronize_srcu(&kfd_processes_srcu);
1058
1059         cancel_delayed_work_sync(&p->eviction_work);
1060         cancel_delayed_work_sync(&p->restore_work);
1061
1062         mutex_lock(&p->mutex);
1063
1064         /* Iterate over all process device data structures and if the
1065          * pdd is in debug mode, we should first force unregistration,
1066          * then we will be able to destroy the queues
1067          */
1068         for (i = 0; i < p->n_pdds; i++) {
1069                 struct kfd_dev *dev = p->pdds[i]->dev;
1070
1071                 mutex_lock(kfd_get_dbgmgr_mutex());
1072                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
1073                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
1074                                 kfd_dbgmgr_destroy(dev->dbgmgr);
1075                                 dev->dbgmgr = NULL;
1076                         }
1077                 }
1078                 mutex_unlock(kfd_get_dbgmgr_mutex());
1079         }
1080
1081         kfd_process_dequeue_from_all_devices(p);
1082         pqm_uninit(&p->pqm);
1083
1084         /* Indicate to other users that MM is no longer valid */
1085         p->mm = NULL;
1086         /* Signal the eviction fence after user mode queues are
1087          * destroyed. This allows any BOs to be freed without
1088          * triggering pointless evictions or waiting for fences.
1089          */
1090         dma_fence_signal(p->ef);
1091
1092         mutex_unlock(&p->mutex);
1093
1094         mmu_notifier_put(&p->mmu_notifier);
1095 }
1096
1097 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1098         .release = kfd_process_notifier_release,
1099         .alloc_notifier = kfd_process_alloc_notifier,
1100         .free_notifier = kfd_process_free_notifier,
1101 };
1102
1103 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
1104 {
1105         unsigned long  offset;
1106         int i;
1107
1108         for (i = 0; i < p->n_pdds; i++) {
1109                 struct kfd_dev *dev = p->pdds[i]->dev;
1110                 struct qcm_process_device *qpd = &p->pdds[i]->qpd;
1111
1112                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
1113                         continue;
1114
1115                 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
1116                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
1117                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
1118                         MAP_SHARED, offset);
1119
1120                 if (IS_ERR_VALUE(qpd->tba_addr)) {
1121                         int err = qpd->tba_addr;
1122
1123                         pr_err("Failure to set tba address. error %d.\n", err);
1124                         qpd->tba_addr = 0;
1125                         qpd->cwsr_kaddr = NULL;
1126                         return err;
1127                 }
1128
1129                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1130
1131                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1132                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1133                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1134         }
1135
1136         return 0;
1137 }
1138
1139 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
1140 {
1141         struct kfd_dev *dev = pdd->dev;
1142         struct qcm_process_device *qpd = &pdd->qpd;
1143         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1144                         | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1145                         | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1146         void *kaddr;
1147         int ret;
1148
1149         if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
1150                 return 0;
1151
1152         /* cwsr_base is only set for dGPU */
1153         ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
1154                                       KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
1155         if (ret)
1156                 return ret;
1157
1158         qpd->cwsr_kaddr = kaddr;
1159         qpd->tba_addr = qpd->cwsr_base;
1160
1161         memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1162
1163         qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1164         pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1165                  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1166
1167         return 0;
1168 }
1169
1170 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1171                                   uint64_t tba_addr,
1172                                   uint64_t tma_addr)
1173 {
1174         if (qpd->cwsr_kaddr) {
1175                 /* KFD trap handler is bound, record as second-level TBA/TMA
1176                  * in first-level TMA. First-level trap will jump to second.
1177                  */
1178                 uint64_t *tma =
1179                         (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1180                 tma[0] = tba_addr;
1181                 tma[1] = tma_addr;
1182         } else {
1183                 /* No trap handler bound, bind as first-level TBA/TMA. */
1184                 qpd->tba_addr = tba_addr;
1185                 qpd->tma_addr = tma_addr;
1186         }
1187 }
1188
1189 /*
1190  * On return the kfd_process is fully operational and will be freed when the
1191  * mm is released
1192  */
1193 static struct kfd_process *create_process(const struct task_struct *thread)
1194 {
1195         struct kfd_process *process;
1196         struct mmu_notifier *mn;
1197         int err = -ENOMEM;
1198
1199         process = kzalloc(sizeof(*process), GFP_KERNEL);
1200         if (!process)
1201                 goto err_alloc_process;
1202
1203         kref_init(&process->ref);
1204         mutex_init(&process->mutex);
1205         process->mm = thread->mm;
1206         process->lead_thread = thread->group_leader;
1207         process->n_pdds = 0;
1208         INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1209         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1210         process->last_restore_timestamp = get_jiffies_64();
1211         kfd_event_init_process(process);
1212         process->is_32bit_user_mode = in_compat_syscall();
1213
1214         process->pasid = kfd_pasid_alloc();
1215         if (process->pasid == 0)
1216                 goto err_alloc_pasid;
1217
1218         err = pqm_init(&process->pqm, process);
1219         if (err != 0)
1220                 goto err_process_pqm_init;
1221
1222         /* init process apertures*/
1223         err = kfd_init_apertures(process);
1224         if (err != 0)
1225                 goto err_init_apertures;
1226
1227         /* alloc_notifier needs to find the process in the hash table */
1228         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1229                         (uintptr_t)process->mm);
1230
1231         /* MMU notifier registration must be the last call that can fail
1232          * because after this point we cannot unwind the process creation.
1233          * After this point, mmu_notifier_put will trigger the cleanup by
1234          * dropping the last process reference in the free_notifier.
1235          */
1236         mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
1237         if (IS_ERR(mn)) {
1238                 err = PTR_ERR(mn);
1239                 goto err_register_notifier;
1240         }
1241         BUG_ON(mn != &process->mmu_notifier);
1242
1243         get_task_struct(process->lead_thread);
1244
1245         return process;
1246
1247 err_register_notifier:
1248         hash_del_rcu(&process->kfd_processes);
1249         kfd_process_free_outstanding_kfd_bos(process);
1250         kfd_process_destroy_pdds(process);
1251 err_init_apertures:
1252         pqm_uninit(&process->pqm);
1253 err_process_pqm_init:
1254         kfd_pasid_free(process->pasid);
1255 err_alloc_pasid:
1256         mutex_destroy(&process->mutex);
1257         kfree(process);
1258 err_alloc_process:
1259         return ERR_PTR(err);
1260 }
1261
1262 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
1263                         struct kfd_dev *dev)
1264 {
1265         unsigned int i;
1266         int range_start = dev->shared_resources.non_cp_doorbells_start;
1267         int range_end = dev->shared_resources.non_cp_doorbells_end;
1268
1269         if (!KFD_IS_SOC15(dev->device_info->asic_family))
1270                 return 0;
1271
1272         qpd->doorbell_bitmap =
1273                 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
1274                                      BITS_PER_BYTE), GFP_KERNEL);
1275         if (!qpd->doorbell_bitmap)
1276                 return -ENOMEM;
1277
1278         /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
1279         pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
1280         pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
1281                         range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1282                         range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
1283
1284         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
1285                 if (i >= range_start && i <= range_end) {
1286                         set_bit(i, qpd->doorbell_bitmap);
1287                         set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1288                                 qpd->doorbell_bitmap);
1289                 }
1290         }
1291
1292         return 0;
1293 }
1294
1295 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
1296                                                         struct kfd_process *p)
1297 {
1298         int i;
1299
1300         for (i = 0; i < p->n_pdds; i++)
1301                 if (p->pdds[i]->dev == dev)
1302                         return p->pdds[i];
1303
1304         return NULL;
1305 }
1306
1307 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
1308                                                         struct kfd_process *p)
1309 {
1310         struct kfd_process_device *pdd = NULL;
1311
1312         if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
1313                 return NULL;
1314         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
1315         if (!pdd)
1316                 return NULL;
1317
1318         if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
1319                 pr_err("Failed to alloc doorbell for pdd\n");
1320                 goto err_free_pdd;
1321         }
1322
1323         if (init_doorbell_bitmap(&pdd->qpd, dev)) {
1324                 pr_err("Failed to init doorbell for process\n");
1325                 goto err_free_pdd;
1326         }
1327
1328         pdd->dev = dev;
1329         INIT_LIST_HEAD(&pdd->qpd.queues_list);
1330         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
1331         pdd->qpd.dqm = dev->dqm;
1332         pdd->qpd.pqm = &p->pqm;
1333         pdd->qpd.evicted = 0;
1334         pdd->qpd.mapped_gws_queue = false;
1335         pdd->process = p;
1336         pdd->bound = PDD_UNBOUND;
1337         pdd->already_dequeued = false;
1338         pdd->runtime_inuse = false;
1339         pdd->vram_usage = 0;
1340         pdd->sdma_past_activity_counter = 0;
1341         atomic64_set(&pdd->evict_duration_counter, 0);
1342         p->pdds[p->n_pdds++] = pdd;
1343
1344         /* Init idr used for memory handle translation */
1345         idr_init(&pdd->alloc_idr);
1346
1347         return pdd;
1348
1349 err_free_pdd:
1350         kfree(pdd);
1351         return NULL;
1352 }
1353
1354 /**
1355  * kfd_process_device_init_vm - Initialize a VM for a process-device
1356  *
1357  * @pdd: The process-device
1358  * @drm_file: Optional pointer to a DRM file descriptor
1359  *
1360  * If @drm_file is specified, it will be used to acquire the VM from
1361  * that file descriptor. If successful, the @pdd takes ownership of
1362  * the file descriptor.
1363  *
1364  * If @drm_file is NULL, a new VM is created.
1365  *
1366  * Returns 0 on success, -errno on failure.
1367  */
1368 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
1369                                struct file *drm_file)
1370 {
1371         struct kfd_process *p;
1372         struct kfd_dev *dev;
1373         int ret;
1374
1375         if (!drm_file)
1376                 return -EINVAL;
1377
1378         if (pdd->vm)
1379                 return -EBUSY;
1380
1381         p = pdd->process;
1382         dev = pdd->dev;
1383
1384         ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
1385                 dev->kgd, drm_file, p->pasid,
1386                 &pdd->vm, &p->kgd_process_info, &p->ef);
1387         if (ret) {
1388                 pr_err("Failed to create process VM object\n");
1389                 return ret;
1390         }
1391
1392         amdgpu_vm_set_task_info(pdd->vm);
1393
1394         ret = kfd_process_device_reserve_ib_mem(pdd);
1395         if (ret)
1396                 goto err_reserve_ib_mem;
1397         ret = kfd_process_device_init_cwsr_dgpu(pdd);
1398         if (ret)
1399                 goto err_init_cwsr;
1400
1401         pdd->drm_file = drm_file;
1402
1403         return 0;
1404
1405 err_init_cwsr:
1406 err_reserve_ib_mem:
1407         kfd_process_device_free_bos(pdd);
1408         pdd->vm = NULL;
1409
1410         return ret;
1411 }
1412
1413 /*
1414  * Direct the IOMMU to bind the process (specifically the pasid->mm)
1415  * to the device.
1416  * Unbinding occurs when the process dies or the device is removed.
1417  *
1418  * Assumes that the process lock is held.
1419  */
1420 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
1421                                                         struct kfd_process *p)
1422 {
1423         struct kfd_process_device *pdd;
1424         int err;
1425
1426         pdd = kfd_get_process_device_data(dev, p);
1427         if (!pdd) {
1428                 pr_err("Process device data doesn't exist\n");
1429                 return ERR_PTR(-ENOMEM);
1430         }
1431
1432         if (!pdd->vm)
1433                 return ERR_PTR(-ENODEV);
1434
1435         /*
1436          * signal runtime-pm system to auto resume and prevent
1437          * further runtime suspend once device pdd is created until
1438          * pdd is destroyed.
1439          */
1440         if (!pdd->runtime_inuse) {
1441                 err = pm_runtime_get_sync(dev->ddev->dev);
1442                 if (err < 0) {
1443                         pm_runtime_put_autosuspend(dev->ddev->dev);
1444                         return ERR_PTR(err);
1445                 }
1446         }
1447
1448         err = kfd_iommu_bind_process_to_device(pdd);
1449         if (err)
1450                 goto out;
1451
1452         /*
1453          * make sure that runtime_usage counter is incremented just once
1454          * per pdd
1455          */
1456         pdd->runtime_inuse = true;
1457
1458         return pdd;
1459
1460 out:
1461         /* balance runpm reference count and exit with error */
1462         if (!pdd->runtime_inuse) {
1463                 pm_runtime_mark_last_busy(dev->ddev->dev);
1464                 pm_runtime_put_autosuspend(dev->ddev->dev);
1465         }
1466
1467         return ERR_PTR(err);
1468 }
1469
1470 /* Create specific handle mapped to mem from process local memory idr
1471  * Assumes that the process lock is held.
1472  */
1473 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1474                                         void *mem)
1475 {
1476         return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1477 }
1478
1479 /* Translate specific handle from process local memory idr
1480  * Assumes that the process lock is held.
1481  */
1482 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1483                                         int handle)
1484 {
1485         if (handle < 0)
1486                 return NULL;
1487
1488         return idr_find(&pdd->alloc_idr, handle);
1489 }
1490
1491 /* Remove specific handle from process local memory idr
1492  * Assumes that the process lock is held.
1493  */
1494 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1495                                         int handle)
1496 {
1497         if (handle >= 0)
1498                 idr_remove(&pdd->alloc_idr, handle);
1499 }
1500
1501 /* This increments the process->ref counter. */
1502 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
1503 {
1504         struct kfd_process *p, *ret_p = NULL;
1505         unsigned int temp;
1506
1507         int idx = srcu_read_lock(&kfd_processes_srcu);
1508
1509         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1510                 if (p->pasid == pasid) {
1511                         kref_get(&p->ref);
1512                         ret_p = p;
1513                         break;
1514                 }
1515         }
1516
1517         srcu_read_unlock(&kfd_processes_srcu, idx);
1518
1519         return ret_p;
1520 }
1521
1522 /* This increments the process->ref counter. */
1523 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1524 {
1525         struct kfd_process *p;
1526
1527         int idx = srcu_read_lock(&kfd_processes_srcu);
1528
1529         p = find_process_by_mm(mm);
1530         if (p)
1531                 kref_get(&p->ref);
1532
1533         srcu_read_unlock(&kfd_processes_srcu, idx);
1534
1535         return p;
1536 }
1537
1538 /* kfd_process_evict_queues - Evict all user queues of a process
1539  *
1540  * Eviction is reference-counted per process-device. This means multiple
1541  * evictions from different sources can be nested safely.
1542  */
1543 int kfd_process_evict_queues(struct kfd_process *p)
1544 {
1545         int r = 0;
1546         int i;
1547         unsigned int n_evicted = 0;
1548
1549         for (i = 0; i < p->n_pdds; i++) {
1550                 struct kfd_process_device *pdd = p->pdds[i];
1551
1552                 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1553                                                             &pdd->qpd);
1554                 if (r) {
1555                         pr_err("Failed to evict process queues\n");
1556                         goto fail;
1557                 }
1558                 n_evicted++;
1559         }
1560
1561         return r;
1562
1563 fail:
1564         /* To keep state consistent, roll back partial eviction by
1565          * restoring queues
1566          */
1567         for (i = 0; i < p->n_pdds; i++) {
1568                 struct kfd_process_device *pdd = p->pdds[i];
1569
1570                 if (n_evicted == 0)
1571                         break;
1572                 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1573                                                               &pdd->qpd))
1574                         pr_err("Failed to restore queues\n");
1575
1576                 n_evicted--;
1577         }
1578
1579         return r;
1580 }
1581
1582 /* kfd_process_restore_queues - Restore all user queues of a process */
1583 int kfd_process_restore_queues(struct kfd_process *p)
1584 {
1585         int r, ret = 0;
1586         int i;
1587
1588         for (i = 0; i < p->n_pdds; i++) {
1589                 struct kfd_process_device *pdd = p->pdds[i];
1590
1591                 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1592                                                               &pdd->qpd);
1593                 if (r) {
1594                         pr_err("Failed to restore process queues\n");
1595                         if (!ret)
1596                                 ret = r;
1597                 }
1598         }
1599
1600         return ret;
1601 }
1602
1603 static void evict_process_worker(struct work_struct *work)
1604 {
1605         int ret;
1606         struct kfd_process *p;
1607         struct delayed_work *dwork;
1608
1609         dwork = to_delayed_work(work);
1610
1611         /* Process termination destroys this worker thread. So during the
1612          * lifetime of this thread, kfd_process p will be valid
1613          */
1614         p = container_of(dwork, struct kfd_process, eviction_work);
1615         WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1616                   "Eviction fence mismatch\n");
1617
1618         /* Narrow window of overlap between restore and evict work
1619          * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1620          * unreserves KFD BOs, it is possible to evicted again. But
1621          * restore has few more steps of finish. So lets wait for any
1622          * previous restore work to complete
1623          */
1624         flush_delayed_work(&p->restore_work);
1625
1626         pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1627         ret = kfd_process_evict_queues(p);
1628         if (!ret) {
1629                 dma_fence_signal(p->ef);
1630                 dma_fence_put(p->ef);
1631                 p->ef = NULL;
1632                 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1633                                 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1634
1635                 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1636         } else
1637                 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1638 }
1639
1640 static void restore_process_worker(struct work_struct *work)
1641 {
1642         struct delayed_work *dwork;
1643         struct kfd_process *p;
1644         int ret = 0;
1645
1646         dwork = to_delayed_work(work);
1647
1648         /* Process termination destroys this worker thread. So during the
1649          * lifetime of this thread, kfd_process p will be valid
1650          */
1651         p = container_of(dwork, struct kfd_process, restore_work);
1652         pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1653
1654         /* Setting last_restore_timestamp before successful restoration.
1655          * Otherwise this would have to be set by KGD (restore_process_bos)
1656          * before KFD BOs are unreserved. If not, the process can be evicted
1657          * again before the timestamp is set.
1658          * If restore fails, the timestamp will be set again in the next
1659          * attempt. This would mean that the minimum GPU quanta would be
1660          * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1661          * functions)
1662          */
1663
1664         p->last_restore_timestamp = get_jiffies_64();
1665         ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1666                                                      &p->ef);
1667         if (ret) {
1668                 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1669                          p->pasid, PROCESS_BACK_OFF_TIME_MS);
1670                 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1671                                 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1672                 WARN(!ret, "reschedule restore work failed\n");
1673                 return;
1674         }
1675
1676         ret = kfd_process_restore_queues(p);
1677         if (!ret)
1678                 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1679         else
1680                 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1681 }
1682
1683 void kfd_suspend_all_processes(void)
1684 {
1685         struct kfd_process *p;
1686         unsigned int temp;
1687         int idx = srcu_read_lock(&kfd_processes_srcu);
1688
1689         WARN(debug_evictions, "Evicting all processes");
1690         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1691                 cancel_delayed_work_sync(&p->eviction_work);
1692                 cancel_delayed_work_sync(&p->restore_work);
1693
1694                 if (kfd_process_evict_queues(p))
1695                         pr_err("Failed to suspend process 0x%x\n", p->pasid);
1696                 dma_fence_signal(p->ef);
1697                 dma_fence_put(p->ef);
1698                 p->ef = NULL;
1699         }
1700         srcu_read_unlock(&kfd_processes_srcu, idx);
1701 }
1702
1703 int kfd_resume_all_processes(void)
1704 {
1705         struct kfd_process *p;
1706         unsigned int temp;
1707         int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1708
1709         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1710                 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1711                         pr_err("Restore process %d failed during resume\n",
1712                                p->pasid);
1713                         ret = -EFAULT;
1714                 }
1715         }
1716         srcu_read_unlock(&kfd_processes_srcu, idx);
1717         return ret;
1718 }
1719
1720 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1721                           struct vm_area_struct *vma)
1722 {
1723         struct kfd_process_device *pdd;
1724         struct qcm_process_device *qpd;
1725
1726         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1727                 pr_err("Incorrect CWSR mapping size.\n");
1728                 return -EINVAL;
1729         }
1730
1731         pdd = kfd_get_process_device_data(dev, process);
1732         if (!pdd)
1733                 return -EINVAL;
1734         qpd = &pdd->qpd;
1735
1736         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1737                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
1738         if (!qpd->cwsr_kaddr) {
1739                 pr_err("Error allocating per process CWSR buffer.\n");
1740                 return -ENOMEM;
1741         }
1742
1743         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1744                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1745         /* Mapping pages to user process */
1746         return remap_pfn_range(vma, vma->vm_start,
1747                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1748                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1749 }
1750
1751 void kfd_flush_tlb(struct kfd_process_device *pdd)
1752 {
1753         struct kfd_dev *dev = pdd->dev;
1754
1755         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1756                 /* Nothing to flush until a VMID is assigned, which
1757                  * only happens when the first queue is created.
1758                  */
1759                 if (pdd->qpd.vmid)
1760                         amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1761                                                         pdd->qpd.vmid);
1762         } else {
1763                 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1764                                                 pdd->process->pasid);
1765         }
1766 }
1767
1768 #if defined(CONFIG_DEBUG_FS)
1769
1770 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1771 {
1772         struct kfd_process *p;
1773         unsigned int temp;
1774         int r = 0;
1775
1776         int idx = srcu_read_lock(&kfd_processes_srcu);
1777
1778         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1779                 seq_printf(m, "Process %d PASID 0x%x:\n",
1780                            p->lead_thread->tgid, p->pasid);
1781
1782                 mutex_lock(&p->mutex);
1783                 r = pqm_debugfs_mqds(m, &p->pqm);
1784                 mutex_unlock(&p->mutex);
1785
1786                 if (r)
1787                         break;
1788         }
1789
1790         srcu_read_unlock(&kfd_processes_srcu, idx);
1791
1792         return r;
1793 }
1794
1795 #endif
1796