]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_process.c
Merge tag 'safesetid-5.13' of git://github.com/micah-morton/linux
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_process.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/mmu_context.h>
29 #include <linux/slab.h>
30 #include <linux/amd-iommu.h>
31 #include <linux/notifier.h>
32 #include <linux/compat.h>
33 #include <linux/mman.h>
34 #include <linux/file.h>
35 #include <linux/pm_runtime.h>
36 #include "amdgpu_amdkfd.h"
37 #include "amdgpu.h"
38
39 struct mm_struct;
40
41 #include "kfd_priv.h"
42 #include "kfd_device_queue_manager.h"
43 #include "kfd_dbgmgr.h"
44 #include "kfd_iommu.h"
45
46 /*
47  * List of struct kfd_process (field kfd_process).
48  * Unique/indexed by mm_struct*
49  */
50 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
51 static DEFINE_MUTEX(kfd_processes_mutex);
52
53 DEFINE_SRCU(kfd_processes_srcu);
54
55 /* For process termination handling */
56 static struct workqueue_struct *kfd_process_wq;
57
58 /* Ordered, single-threaded workqueue for restoring evicted
59  * processes. Restoring multiple processes concurrently under memory
60  * pressure can lead to processes blocking each other from validating
61  * their BOs and result in a live-lock situation where processes
62  * remain evicted indefinitely.
63  */
64 static struct workqueue_struct *kfd_restore_wq;
65
66 static struct kfd_process *find_process(const struct task_struct *thread);
67 static void kfd_process_ref_release(struct kref *ref);
68 static struct kfd_process *create_process(const struct task_struct *thread);
69 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
70
71 static void evict_process_worker(struct work_struct *work);
72 static void restore_process_worker(struct work_struct *work);
73
74 struct kfd_procfs_tree {
75         struct kobject *kobj;
76 };
77
78 static struct kfd_procfs_tree procfs;
79
80 /*
81  * Structure for SDMA activity tracking
82  */
83 struct kfd_sdma_activity_handler_workarea {
84         struct work_struct sdma_activity_work;
85         struct kfd_process_device *pdd;
86         uint64_t sdma_activity_counter;
87 };
88
89 struct temp_sdma_queue_list {
90         uint64_t __user *rptr;
91         uint64_t sdma_val;
92         unsigned int queue_id;
93         struct list_head list;
94 };
95
96 static void kfd_sdma_activity_worker(struct work_struct *work)
97 {
98         struct kfd_sdma_activity_handler_workarea *workarea;
99         struct kfd_process_device *pdd;
100         uint64_t val;
101         struct mm_struct *mm;
102         struct queue *q;
103         struct qcm_process_device *qpd;
104         struct device_queue_manager *dqm;
105         int ret = 0;
106         struct temp_sdma_queue_list sdma_q_list;
107         struct temp_sdma_queue_list *sdma_q, *next;
108
109         workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
110                                 sdma_activity_work);
111         if (!workarea)
112                 return;
113
114         pdd = workarea->pdd;
115         if (!pdd)
116                 return;
117         dqm = pdd->dev->dqm;
118         qpd = &pdd->qpd;
119         if (!dqm || !qpd)
120                 return;
121         /*
122          * Total SDMA activity is current SDMA activity + past SDMA activity
123          * Past SDMA count is stored in pdd.
124          * To get the current activity counters for all active SDMA queues,
125          * we loop over all SDMA queues and get their counts from user-space.
126          *
127          * We cannot call get_user() with dqm_lock held as it can cause
128          * a circular lock dependency situation. To read the SDMA stats,
129          * we need to do the following:
130          *
131          * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
132          *    with dqm_lock/dqm_unlock().
133          * 2. Call get_user() for each node in temporary list without dqm_lock.
134          *    Save the SDMA count for each node and also add the count to the total
135          *    SDMA count counter.
136          *    Its possible, during this step, a few SDMA queue nodes got deleted
137          *    from the qpd->queues_list.
138          * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
139          *    If any node got deleted, its SDMA count would be captured in the sdma
140          *    past activity counter. So subtract the SDMA counter stored in step 2
141          *    for this node from the total SDMA count.
142          */
143         INIT_LIST_HEAD(&sdma_q_list.list);
144
145         /*
146          * Create the temp list of all SDMA queues
147          */
148         dqm_lock(dqm);
149
150         list_for_each_entry(q, &qpd->queues_list, list) {
151                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
152                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
153                         continue;
154
155                 sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
156                 if (!sdma_q) {
157                         dqm_unlock(dqm);
158                         goto cleanup;
159                 }
160
161                 INIT_LIST_HEAD(&sdma_q->list);
162                 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
163                 sdma_q->queue_id = q->properties.queue_id;
164                 list_add_tail(&sdma_q->list, &sdma_q_list.list);
165         }
166
167         /*
168          * If the temp list is empty, then no SDMA queues nodes were found in
169          * qpd->queues_list. Return the past activity count as the total sdma
170          * count
171          */
172         if (list_empty(&sdma_q_list.list)) {
173                 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
174                 dqm_unlock(dqm);
175                 return;
176         }
177
178         dqm_unlock(dqm);
179
180         /*
181          * Get the usage count for each SDMA queue in temp_list.
182          */
183         mm = get_task_mm(pdd->process->lead_thread);
184         if (!mm)
185                 goto cleanup;
186
187         kthread_use_mm(mm);
188
189         list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
190                 val = 0;
191                 ret = read_sdma_queue_counter(sdma_q->rptr, &val);
192                 if (ret) {
193                         pr_debug("Failed to read SDMA queue active counter for queue id: %d",
194                                  sdma_q->queue_id);
195                 } else {
196                         sdma_q->sdma_val = val;
197                         workarea->sdma_activity_counter += val;
198                 }
199         }
200
201         kthread_unuse_mm(mm);
202         mmput(mm);
203
204         /*
205          * Do a second iteration over qpd_queues_list to check if any SDMA
206          * nodes got deleted while fetching SDMA counter.
207          */
208         dqm_lock(dqm);
209
210         workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
211
212         list_for_each_entry(q, &qpd->queues_list, list) {
213                 if (list_empty(&sdma_q_list.list))
214                         break;
215
216                 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
217                     (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
218                         continue;
219
220                 list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
221                         if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
222                              (sdma_q->queue_id == q->properties.queue_id)) {
223                                 list_del(&sdma_q->list);
224                                 kfree(sdma_q);
225                                 break;
226                         }
227                 }
228         }
229
230         dqm_unlock(dqm);
231
232         /*
233          * If temp list is not empty, it implies some queues got deleted
234          * from qpd->queues_list during SDMA usage read. Subtract the SDMA
235          * count for each node from the total SDMA count.
236          */
237         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
238                 workarea->sdma_activity_counter -= sdma_q->sdma_val;
239                 list_del(&sdma_q->list);
240                 kfree(sdma_q);
241         }
242
243         return;
244
245 cleanup:
246         list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
247                 list_del(&sdma_q->list);
248                 kfree(sdma_q);
249         }
250 }
251
252 /**
253  * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
254  * by current process. Translates acquired wave count into number of compute units
255  * that are occupied.
256  *
257  * @atr: Handle of attribute that allows reporting of wave count. The attribute
258  * handle encapsulates GPU device it is associated with, thereby allowing collection
259  * of waves in flight, etc
260  *
261  * @buffer: Handle of user provided buffer updated with wave count
262  *
263  * Return: Number of bytes written to user buffer or an error value
264  */
265 static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
266 {
267         int cu_cnt;
268         int wave_cnt;
269         int max_waves_per_cu;
270         struct kfd_dev *dev = NULL;
271         struct kfd_process *proc = NULL;
272         struct kfd_process_device *pdd = NULL;
273
274         pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
275         dev = pdd->dev;
276         if (dev->kfd2kgd->get_cu_occupancy == NULL)
277                 return -EINVAL;
278
279         cu_cnt = 0;
280         proc = pdd->process;
281         if (pdd->qpd.queue_count == 0) {
282                 pr_debug("Gpu-Id: %d has no active queues for process %d\n",
283                          dev->id, proc->pasid);
284                 return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
285         }
286
287         /* Collect wave count from device if it supports */
288         wave_cnt = 0;
289         max_waves_per_cu = 0;
290         dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
291                         &max_waves_per_cu);
292
293         /* Translate wave count to number of compute units */
294         cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
295         return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
296 }
297
298 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
299                                char *buffer)
300 {
301         if (strcmp(attr->name, "pasid") == 0) {
302                 struct kfd_process *p = container_of(attr, struct kfd_process,
303                                                      attr_pasid);
304
305                 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
306         } else if (strncmp(attr->name, "vram_", 5) == 0) {
307                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
308                                                               attr_vram);
309                 return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
310         } else if (strncmp(attr->name, "sdma_", 5) == 0) {
311                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
312                                                               attr_sdma);
313                 struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
314
315                 INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
316                                         kfd_sdma_activity_worker);
317
318                 sdma_activity_work_handler.pdd = pdd;
319                 sdma_activity_work_handler.sdma_activity_counter = 0;
320
321                 schedule_work(&sdma_activity_work_handler.sdma_activity_work);
322
323                 flush_work(&sdma_activity_work_handler.sdma_activity_work);
324
325                 return snprintf(buffer, PAGE_SIZE, "%llu\n",
326                                 (sdma_activity_work_handler.sdma_activity_counter)/
327                                  SDMA_ACTIVITY_DIVISOR);
328         } else {
329                 pr_err("Invalid attribute");
330                 return -EINVAL;
331         }
332
333         return 0;
334 }
335
336 static void kfd_procfs_kobj_release(struct kobject *kobj)
337 {
338         kfree(kobj);
339 }
340
341 static const struct sysfs_ops kfd_procfs_ops = {
342         .show = kfd_procfs_show,
343 };
344
345 static struct kobj_type procfs_type = {
346         .release = kfd_procfs_kobj_release,
347         .sysfs_ops = &kfd_procfs_ops,
348 };
349
350 void kfd_procfs_init(void)
351 {
352         int ret = 0;
353
354         procfs.kobj = kfd_alloc_struct(procfs.kobj);
355         if (!procfs.kobj)
356                 return;
357
358         ret = kobject_init_and_add(procfs.kobj, &procfs_type,
359                                    &kfd_device->kobj, "proc");
360         if (ret) {
361                 pr_warn("Could not create procfs proc folder");
362                 /* If we fail to create the procfs, clean up */
363                 kfd_procfs_shutdown();
364         }
365 }
366
367 void kfd_procfs_shutdown(void)
368 {
369         if (procfs.kobj) {
370                 kobject_del(procfs.kobj);
371                 kobject_put(procfs.kobj);
372                 procfs.kobj = NULL;
373         }
374 }
375
376 static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
377                                      struct attribute *attr, char *buffer)
378 {
379         struct queue *q = container_of(kobj, struct queue, kobj);
380
381         if (!strcmp(attr->name, "size"))
382                 return snprintf(buffer, PAGE_SIZE, "%llu",
383                                 q->properties.queue_size);
384         else if (!strcmp(attr->name, "type"))
385                 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
386         else if (!strcmp(attr->name, "gpuid"))
387                 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
388         else
389                 pr_err("Invalid attribute");
390
391         return 0;
392 }
393
394 static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
395                                      struct attribute *attr, char *buffer)
396 {
397         if (strcmp(attr->name, "evicted_ms") == 0) {
398                 struct kfd_process_device *pdd = container_of(attr,
399                                 struct kfd_process_device,
400                                 attr_evict);
401                 uint64_t evict_jiffies;
402
403                 evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
404
405                 return snprintf(buffer,
406                                 PAGE_SIZE,
407                                 "%llu\n",
408                                 jiffies64_to_msecs(evict_jiffies));
409
410         /* Sysfs handle that gets CU occupancy is per device */
411         } else if (strcmp(attr->name, "cu_occupancy") == 0) {
412                 return kfd_get_cu_occupancy(attr, buffer);
413         } else {
414                 pr_err("Invalid attribute");
415         }
416
417         return 0;
418 }
419
420 static struct attribute attr_queue_size = {
421         .name = "size",
422         .mode = KFD_SYSFS_FILE_MODE
423 };
424
425 static struct attribute attr_queue_type = {
426         .name = "type",
427         .mode = KFD_SYSFS_FILE_MODE
428 };
429
430 static struct attribute attr_queue_gpuid = {
431         .name = "gpuid",
432         .mode = KFD_SYSFS_FILE_MODE
433 };
434
435 static struct attribute *procfs_queue_attrs[] = {
436         &attr_queue_size,
437         &attr_queue_type,
438         &attr_queue_gpuid,
439         NULL
440 };
441
442 static const struct sysfs_ops procfs_queue_ops = {
443         .show = kfd_procfs_queue_show,
444 };
445
446 static struct kobj_type procfs_queue_type = {
447         .sysfs_ops = &procfs_queue_ops,
448         .default_attrs = procfs_queue_attrs,
449 };
450
451 static const struct sysfs_ops procfs_stats_ops = {
452         .show = kfd_procfs_stats_show,
453 };
454
455 static struct attribute *procfs_stats_attrs[] = {
456         NULL
457 };
458
459 static struct kobj_type procfs_stats_type = {
460         .sysfs_ops = &procfs_stats_ops,
461         .default_attrs = procfs_stats_attrs,
462 };
463
464 int kfd_procfs_add_queue(struct queue *q)
465 {
466         struct kfd_process *proc;
467         int ret;
468
469         if (!q || !q->process)
470                 return -EINVAL;
471         proc = q->process;
472
473         /* Create proc/<pid>/queues/<queue id> folder */
474         if (!proc->kobj_queues)
475                 return -EFAULT;
476         ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
477                         proc->kobj_queues, "%u", q->properties.queue_id);
478         if (ret < 0) {
479                 pr_warn("Creating proc/<pid>/queues/%u failed",
480                         q->properties.queue_id);
481                 kobject_put(&q->kobj);
482                 return ret;
483         }
484
485         return 0;
486 }
487
488 static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
489                                  char *name)
490 {
491         int ret = 0;
492
493         if (!p || !attr || !name)
494                 return -EINVAL;
495
496         attr->name = name;
497         attr->mode = KFD_SYSFS_FILE_MODE;
498         sysfs_attr_init(attr);
499
500         ret = sysfs_create_file(p->kobj, attr);
501
502         return ret;
503 }
504
505 static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
506 {
507         int ret = 0;
508         int i;
509         char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
510
511         if (!p)
512                 return -EINVAL;
513
514         if (!p->kobj)
515                 return -EFAULT;
516
517         /*
518          * Create sysfs files for each GPU:
519          * - proc/<pid>/stats_<gpuid>/
520          * - proc/<pid>/stats_<gpuid>/evicted_ms
521          * - proc/<pid>/stats_<gpuid>/cu_occupancy
522          */
523         for (i = 0; i < p->n_pdds; i++) {
524                 struct kfd_process_device *pdd = p->pdds[i];
525                 struct kobject *kobj_stats;
526
527                 snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
528                                 "stats_%u", pdd->dev->id);
529                 kobj_stats = kfd_alloc_struct(kobj_stats);
530                 if (!kobj_stats)
531                         return -ENOMEM;
532
533                 ret = kobject_init_and_add(kobj_stats,
534                                                 &procfs_stats_type,
535                                                 p->kobj,
536                                                 stats_dir_filename);
537
538                 if (ret) {
539                         pr_warn("Creating KFD proc/stats_%s folder failed",
540                                         stats_dir_filename);
541                         kobject_put(kobj_stats);
542                         goto err;
543                 }
544
545                 pdd->kobj_stats = kobj_stats;
546                 pdd->attr_evict.name = "evicted_ms";
547                 pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
548                 sysfs_attr_init(&pdd->attr_evict);
549                 ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
550                 if (ret)
551                         pr_warn("Creating eviction stats for gpuid %d failed",
552                                         (int)pdd->dev->id);
553
554                 /* Add sysfs file to report compute unit occupancy */
555                 if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
556                         pdd->attr_cu_occupancy.name = "cu_occupancy";
557                         pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
558                         sysfs_attr_init(&pdd->attr_cu_occupancy);
559                         ret = sysfs_create_file(kobj_stats,
560                                                 &pdd->attr_cu_occupancy);
561                         if (ret)
562                                 pr_warn("Creating %s failed for gpuid: %d",
563                                         pdd->attr_cu_occupancy.name,
564                                         (int)pdd->dev->id);
565                 }
566         }
567 err:
568         return ret;
569 }
570
571
572 static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
573 {
574         int ret = 0;
575         int i;
576
577         if (!p)
578                 return -EINVAL;
579
580         if (!p->kobj)
581                 return -EFAULT;
582
583         /*
584          * Create sysfs files for each GPU:
585          * - proc/<pid>/vram_<gpuid>
586          * - proc/<pid>/sdma_<gpuid>
587          */
588         for (i = 0; i < p->n_pdds; i++) {
589                 struct kfd_process_device *pdd = p->pdds[i];
590
591                 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
592                          pdd->dev->id);
593                 ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
594                 if (ret)
595                         pr_warn("Creating vram usage for gpu id %d failed",
596                                 (int)pdd->dev->id);
597
598                 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
599                          pdd->dev->id);
600                 ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
601                 if (ret)
602                         pr_warn("Creating sdma usage for gpu id %d failed",
603                                 (int)pdd->dev->id);
604         }
605
606         return ret;
607 }
608
609 void kfd_procfs_del_queue(struct queue *q)
610 {
611         if (!q)
612                 return;
613
614         kobject_del(&q->kobj);
615         kobject_put(&q->kobj);
616 }
617
618 int kfd_process_create_wq(void)
619 {
620         if (!kfd_process_wq)
621                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
622         if (!kfd_restore_wq)
623                 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
624
625         if (!kfd_process_wq || !kfd_restore_wq) {
626                 kfd_process_destroy_wq();
627                 return -ENOMEM;
628         }
629
630         return 0;
631 }
632
633 void kfd_process_destroy_wq(void)
634 {
635         if (kfd_process_wq) {
636                 destroy_workqueue(kfd_process_wq);
637                 kfd_process_wq = NULL;
638         }
639         if (kfd_restore_wq) {
640                 destroy_workqueue(kfd_restore_wq);
641                 kfd_restore_wq = NULL;
642         }
643 }
644
645 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
646                         struct kfd_process_device *pdd)
647 {
648         struct kfd_dev *dev = pdd->dev;
649
650         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
651         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
652 }
653
654 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
655  *      This function should be only called right after the process
656  *      is created and when kfd_processes_mutex is still being held
657  *      to avoid concurrency. Because of that exclusiveness, we do
658  *      not need to take p->mutex.
659  */
660 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
661                                    uint64_t gpu_va, uint32_t size,
662                                    uint32_t flags, void **kptr)
663 {
664         struct kfd_dev *kdev = pdd->dev;
665         struct kgd_mem *mem = NULL;
666         int handle;
667         int err;
668
669         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
670                                                  pdd->vm, &mem, NULL, flags);
671         if (err)
672                 goto err_alloc_mem;
673
674         err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
675         if (err)
676                 goto err_map_mem;
677
678         err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
679         if (err) {
680                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
681                 goto sync_memory_failed;
682         }
683
684         /* Create an obj handle so kfd_process_device_remove_obj_handle
685          * will take care of the bo removal when the process finishes.
686          * We do not need to take p->mutex, because the process is just
687          * created and the ioctls have not had the chance to run.
688          */
689         handle = kfd_process_device_create_obj_handle(pdd, mem);
690
691         if (handle < 0) {
692                 err = handle;
693                 goto free_gpuvm;
694         }
695
696         if (kptr) {
697                 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
698                                 (struct kgd_mem *)mem, kptr, NULL);
699                 if (err) {
700                         pr_debug("Map GTT BO to kernel failed\n");
701                         goto free_obj_handle;
702                 }
703         }
704
705         return err;
706
707 free_obj_handle:
708         kfd_process_device_remove_obj_handle(pdd, handle);
709 free_gpuvm:
710 sync_memory_failed:
711         kfd_process_free_gpuvm(mem, pdd);
712         return err;
713
714 err_map_mem:
715         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
716 err_alloc_mem:
717         *kptr = NULL;
718         return err;
719 }
720
721 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
722  *      process for IB usage The memory reserved is for KFD to submit
723  *      IB to AMDGPU from kernel.  If the memory is reserved
724  *      successfully, ib_kaddr will have the CPU/kernel
725  *      address. Check ib_kaddr before accessing the memory.
726  */
727 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
728 {
729         struct qcm_process_device *qpd = &pdd->qpd;
730         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
731                         KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
732                         KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
733                         KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
734         void *kaddr;
735         int ret;
736
737         if (qpd->ib_kaddr || !qpd->ib_base)
738                 return 0;
739
740         /* ib_base is only set for dGPU */
741         ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
742                                       &kaddr);
743         if (ret)
744                 return ret;
745
746         qpd->ib_kaddr = kaddr;
747
748         return 0;
749 }
750
751 struct kfd_process *kfd_create_process(struct file *filep)
752 {
753         struct kfd_process *process;
754         struct task_struct *thread = current;
755         int ret;
756
757         if (!thread->mm)
758                 return ERR_PTR(-EINVAL);
759
760         /* Only the pthreads threading model is supported. */
761         if (thread->group_leader->mm != thread->mm)
762                 return ERR_PTR(-EINVAL);
763
764         /*
765          * take kfd processes mutex before starting of process creation
766          * so there won't be a case where two threads of the same process
767          * create two kfd_process structures
768          */
769         mutex_lock(&kfd_processes_mutex);
770
771         /* A prior open of /dev/kfd could have already created the process. */
772         process = find_process(thread);
773         if (process) {
774                 pr_debug("Process already found\n");
775         } else {
776                 process = create_process(thread);
777                 if (IS_ERR(process))
778                         goto out;
779
780                 ret = kfd_process_init_cwsr_apu(process, filep);
781                 if (ret)
782                         goto out_destroy;
783
784                 if (!procfs.kobj)
785                         goto out;
786
787                 process->kobj = kfd_alloc_struct(process->kobj);
788                 if (!process->kobj) {
789                         pr_warn("Creating procfs kobject failed");
790                         goto out;
791                 }
792                 ret = kobject_init_and_add(process->kobj, &procfs_type,
793                                            procfs.kobj, "%d",
794                                            (int)process->lead_thread->pid);
795                 if (ret) {
796                         pr_warn("Creating procfs pid directory failed");
797                         kobject_put(process->kobj);
798                         goto out;
799                 }
800
801                 process->attr_pasid.name = "pasid";
802                 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
803                 sysfs_attr_init(&process->attr_pasid);
804                 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
805                 if (ret)
806                         pr_warn("Creating pasid for pid %d failed",
807                                         (int)process->lead_thread->pid);
808
809                 process->kobj_queues = kobject_create_and_add("queues",
810                                                         process->kobj);
811                 if (!process->kobj_queues)
812                         pr_warn("Creating KFD proc/queues folder failed");
813
814                 ret = kfd_procfs_add_sysfs_stats(process);
815                 if (ret)
816                         pr_warn("Creating sysfs stats dir for pid %d failed",
817                                 (int)process->lead_thread->pid);
818
819                 ret = kfd_procfs_add_sysfs_files(process);
820                 if (ret)
821                         pr_warn("Creating sysfs usage file for pid %d failed",
822                                 (int)process->lead_thread->pid);
823         }
824 out:
825         if (!IS_ERR(process))
826                 kref_get(&process->ref);
827         mutex_unlock(&kfd_processes_mutex);
828
829         return process;
830
831 out_destroy:
832         hash_del_rcu(&process->kfd_processes);
833         mutex_unlock(&kfd_processes_mutex);
834         synchronize_srcu(&kfd_processes_srcu);
835         /* kfd_process_free_notifier will trigger the cleanup */
836         mmu_notifier_put(&process->mmu_notifier);
837         return ERR_PTR(ret);
838 }
839
840 struct kfd_process *kfd_get_process(const struct task_struct *thread)
841 {
842         struct kfd_process *process;
843
844         if (!thread->mm)
845                 return ERR_PTR(-EINVAL);
846
847         /* Only the pthreads threading model is supported. */
848         if (thread->group_leader->mm != thread->mm)
849                 return ERR_PTR(-EINVAL);
850
851         process = find_process(thread);
852         if (!process)
853                 return ERR_PTR(-EINVAL);
854
855         return process;
856 }
857
858 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
859 {
860         struct kfd_process *process;
861
862         hash_for_each_possible_rcu(kfd_processes_table, process,
863                                         kfd_processes, (uintptr_t)mm)
864                 if (process->mm == mm)
865                         return process;
866
867         return NULL;
868 }
869
870 static struct kfd_process *find_process(const struct task_struct *thread)
871 {
872         struct kfd_process *p;
873         int idx;
874
875         idx = srcu_read_lock(&kfd_processes_srcu);
876         p = find_process_by_mm(thread->mm);
877         srcu_read_unlock(&kfd_processes_srcu, idx);
878
879         return p;
880 }
881
882 void kfd_unref_process(struct kfd_process *p)
883 {
884         kref_put(&p->ref, kfd_process_ref_release);
885 }
886
887
888 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
889 {
890         struct kfd_process *p = pdd->process;
891         void *mem;
892         int id;
893         int i;
894
895         /*
896          * Remove all handles from idr and release appropriate
897          * local memory object
898          */
899         idr_for_each_entry(&pdd->alloc_idr, mem, id) {
900
901                 for (i = 0; i < p->n_pdds; i++) {
902                         struct kfd_process_device *peer_pdd = p->pdds[i];
903
904                         if (!peer_pdd->vm)
905                                 continue;
906                         amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
907                                 peer_pdd->dev->kgd, mem, peer_pdd->vm);
908                 }
909
910                 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
911                 kfd_process_device_remove_obj_handle(pdd, id);
912         }
913 }
914
915 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
916 {
917         int i;
918
919         for (i = 0; i < p->n_pdds; i++)
920                 kfd_process_device_free_bos(p->pdds[i]);
921 }
922
923 static void kfd_process_destroy_pdds(struct kfd_process *p)
924 {
925         int i;
926
927         for (i = 0; i < p->n_pdds; i++) {
928                 struct kfd_process_device *pdd = p->pdds[i];
929
930                 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
931                                 pdd->dev->id, p->pasid);
932
933                 if (pdd->drm_file) {
934                         amdgpu_amdkfd_gpuvm_release_process_vm(
935                                         pdd->dev->kgd, pdd->vm);
936                         fput(pdd->drm_file);
937                 }
938
939                 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
940                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
941                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
942
943                 kfree(pdd->qpd.doorbell_bitmap);
944                 idr_destroy(&pdd->alloc_idr);
945
946                 kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
947
948                 /*
949                  * before destroying pdd, make sure to report availability
950                  * for auto suspend
951                  */
952                 if (pdd->runtime_inuse) {
953                         pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
954                         pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
955                         pdd->runtime_inuse = false;
956                 }
957
958                 kfree(pdd);
959                 p->pdds[i] = NULL;
960         }
961         p->n_pdds = 0;
962 }
963
964 /* No process locking is needed in this function, because the process
965  * is not findable any more. We must assume that no other thread is
966  * using it any more, otherwise we couldn't safely free the process
967  * structure in the end.
968  */
969 static void kfd_process_wq_release(struct work_struct *work)
970 {
971         struct kfd_process *p = container_of(work, struct kfd_process,
972                                              release_work);
973         int i;
974
975         /* Remove the procfs files */
976         if (p->kobj) {
977                 sysfs_remove_file(p->kobj, &p->attr_pasid);
978                 kobject_del(p->kobj_queues);
979                 kobject_put(p->kobj_queues);
980                 p->kobj_queues = NULL;
981
982                 for (i = 0; i < p->n_pdds; i++) {
983                         struct kfd_process_device *pdd = p->pdds[i];
984
985                         sysfs_remove_file(p->kobj, &pdd->attr_vram);
986                         sysfs_remove_file(p->kobj, &pdd->attr_sdma);
987                         sysfs_remove_file(p->kobj, &pdd->attr_evict);
988                         if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
989                                 sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
990                         kobject_del(pdd->kobj_stats);
991                         kobject_put(pdd->kobj_stats);
992                         pdd->kobj_stats = NULL;
993                 }
994
995                 kobject_del(p->kobj);
996                 kobject_put(p->kobj);
997                 p->kobj = NULL;
998         }
999
1000         kfd_iommu_unbind_process(p);
1001
1002         kfd_process_free_outstanding_kfd_bos(p);
1003
1004         kfd_process_destroy_pdds(p);
1005         dma_fence_put(p->ef);
1006
1007         kfd_event_free_process(p);
1008
1009         kfd_pasid_free(p->pasid);
1010         mutex_destroy(&p->mutex);
1011
1012         put_task_struct(p->lead_thread);
1013
1014         kfree(p);
1015 }
1016
1017 static void kfd_process_ref_release(struct kref *ref)
1018 {
1019         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
1020
1021         INIT_WORK(&p->release_work, kfd_process_wq_release);
1022         queue_work(kfd_process_wq, &p->release_work);
1023 }
1024
1025 static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
1026 {
1027         int idx = srcu_read_lock(&kfd_processes_srcu);
1028         struct kfd_process *p = find_process_by_mm(mm);
1029
1030         srcu_read_unlock(&kfd_processes_srcu, idx);
1031
1032         return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
1033 }
1034
1035 static void kfd_process_free_notifier(struct mmu_notifier *mn)
1036 {
1037         kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
1038 }
1039
1040 static void kfd_process_notifier_release(struct mmu_notifier *mn,
1041                                         struct mm_struct *mm)
1042 {
1043         struct kfd_process *p;
1044         int i;
1045
1046         /*
1047          * The kfd_process structure can not be free because the
1048          * mmu_notifier srcu is read locked
1049          */
1050         p = container_of(mn, struct kfd_process, mmu_notifier);
1051         if (WARN_ON(p->mm != mm))
1052                 return;
1053
1054         mutex_lock(&kfd_processes_mutex);
1055         hash_del_rcu(&p->kfd_processes);
1056         mutex_unlock(&kfd_processes_mutex);
1057         synchronize_srcu(&kfd_processes_srcu);
1058
1059         cancel_delayed_work_sync(&p->eviction_work);
1060         cancel_delayed_work_sync(&p->restore_work);
1061
1062         mutex_lock(&p->mutex);
1063
1064         /* Iterate over all process device data structures and if the
1065          * pdd is in debug mode, we should first force unregistration,
1066          * then we will be able to destroy the queues
1067          */
1068         for (i = 0; i < p->n_pdds; i++) {
1069                 struct kfd_dev *dev = p->pdds[i]->dev;
1070
1071                 mutex_lock(kfd_get_dbgmgr_mutex());
1072                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
1073                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
1074                                 kfd_dbgmgr_destroy(dev->dbgmgr);
1075                                 dev->dbgmgr = NULL;
1076                         }
1077                 }
1078                 mutex_unlock(kfd_get_dbgmgr_mutex());
1079         }
1080
1081         kfd_process_dequeue_from_all_devices(p);
1082         pqm_uninit(&p->pqm);
1083
1084         /* Indicate to other users that MM is no longer valid */
1085         p->mm = NULL;
1086         /* Signal the eviction fence after user mode queues are
1087          * destroyed. This allows any BOs to be freed without
1088          * triggering pointless evictions or waiting for fences.
1089          */
1090         dma_fence_signal(p->ef);
1091
1092         mutex_unlock(&p->mutex);
1093
1094         mmu_notifier_put(&p->mmu_notifier);
1095 }
1096
1097 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1098         .release = kfd_process_notifier_release,
1099         .alloc_notifier = kfd_process_alloc_notifier,
1100         .free_notifier = kfd_process_free_notifier,
1101 };
1102
1103 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
1104 {
1105         unsigned long  offset;
1106         int i;
1107
1108         for (i = 0; i < p->n_pdds; i++) {
1109                 struct kfd_dev *dev = p->pdds[i]->dev;
1110                 struct qcm_process_device *qpd = &p->pdds[i]->qpd;
1111
1112                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
1113                         continue;
1114
1115                 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
1116                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
1117                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
1118                         MAP_SHARED, offset);
1119
1120                 if (IS_ERR_VALUE(qpd->tba_addr)) {
1121                         int err = qpd->tba_addr;
1122
1123                         pr_err("Failure to set tba address. error %d.\n", err);
1124                         qpd->tba_addr = 0;
1125                         qpd->cwsr_kaddr = NULL;
1126                         return err;
1127                 }
1128
1129                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1130
1131                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1132                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1133                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1134         }
1135
1136         return 0;
1137 }
1138
1139 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
1140 {
1141         struct kfd_dev *dev = pdd->dev;
1142         struct qcm_process_device *qpd = &pdd->qpd;
1143         uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
1144                         | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
1145                         | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1146         void *kaddr;
1147         int ret;
1148
1149         if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
1150                 return 0;
1151
1152         /* cwsr_base is only set for dGPU */
1153         ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
1154                                       KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
1155         if (ret)
1156                 return ret;
1157
1158         qpd->cwsr_kaddr = kaddr;
1159         qpd->tba_addr = qpd->cwsr_base;
1160
1161         memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
1162
1163         qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
1164         pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
1165                  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
1166
1167         return 0;
1168 }
1169
1170 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1171                                   uint64_t tba_addr,
1172                                   uint64_t tma_addr)
1173 {
1174         if (qpd->cwsr_kaddr) {
1175                 /* KFD trap handler is bound, record as second-level TBA/TMA
1176                  * in first-level TMA. First-level trap will jump to second.
1177                  */
1178                 uint64_t *tma =
1179                         (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1180                 tma[0] = tba_addr;
1181                 tma[1] = tma_addr;
1182         } else {
1183                 /* No trap handler bound, bind as first-level TBA/TMA. */
1184                 qpd->tba_addr = tba_addr;
1185                 qpd->tma_addr = tma_addr;
1186         }
1187 }
1188
1189 /*
1190  * On return the kfd_process is fully operational and will be freed when the
1191  * mm is released
1192  */
1193 static struct kfd_process *create_process(const struct task_struct *thread)
1194 {
1195         struct kfd_process *process;
1196         struct mmu_notifier *mn;
1197         int err = -ENOMEM;
1198
1199         process = kzalloc(sizeof(*process), GFP_KERNEL);
1200         if (!process)
1201                 goto err_alloc_process;
1202
1203         kref_init(&process->ref);
1204         mutex_init(&process->mutex);
1205         process->mm = thread->mm;
1206         process->lead_thread = thread->group_leader;
1207         process->n_pdds = 0;
1208         INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
1209         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
1210         process->last_restore_timestamp = get_jiffies_64();
1211         kfd_event_init_process(process);
1212         process->is_32bit_user_mode = in_compat_syscall();
1213
1214         process->pasid = kfd_pasid_alloc();
1215         if (process->pasid == 0)
1216                 goto err_alloc_pasid;
1217
1218         err = pqm_init(&process->pqm, process);
1219         if (err != 0)
1220                 goto err_process_pqm_init;
1221
1222         /* init process apertures*/
1223         err = kfd_init_apertures(process);
1224         if (err != 0)
1225                 goto err_init_apertures;
1226
1227         /* alloc_notifier needs to find the process in the hash table */
1228         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
1229                         (uintptr_t)process->mm);
1230
1231         /* MMU notifier registration must be the last call that can fail
1232          * because after this point we cannot unwind the process creation.
1233          * After this point, mmu_notifier_put will trigger the cleanup by
1234          * dropping the last process reference in the free_notifier.
1235          */
1236         mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
1237         if (IS_ERR(mn)) {
1238                 err = PTR_ERR(mn);
1239                 goto err_register_notifier;
1240         }
1241         BUG_ON(mn != &process->mmu_notifier);
1242
1243         get_task_struct(process->lead_thread);
1244
1245         return process;
1246
1247 err_register_notifier:
1248         hash_del_rcu(&process->kfd_processes);
1249         kfd_process_free_outstanding_kfd_bos(process);
1250         kfd_process_destroy_pdds(process);
1251 err_init_apertures:
1252         pqm_uninit(&process->pqm);
1253 err_process_pqm_init:
1254         kfd_pasid_free(process->pasid);
1255 err_alloc_pasid:
1256         mutex_destroy(&process->mutex);
1257         kfree(process);
1258 err_alloc_process:
1259         return ERR_PTR(err);
1260 }
1261
1262 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
1263                         struct kfd_dev *dev)
1264 {
1265         unsigned int i;
1266         int range_start = dev->shared_resources.non_cp_doorbells_start;
1267         int range_end = dev->shared_resources.non_cp_doorbells_end;
1268
1269         if (!KFD_IS_SOC15(dev->device_info->asic_family))
1270                 return 0;
1271
1272         qpd->doorbell_bitmap =
1273                 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
1274                                      BITS_PER_BYTE), GFP_KERNEL);
1275         if (!qpd->doorbell_bitmap)
1276                 return -ENOMEM;
1277
1278         /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
1279         pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
1280         pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
1281                         range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1282                         range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
1283
1284         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
1285                 if (i >= range_start && i <= range_end) {
1286                         set_bit(i, qpd->doorbell_bitmap);
1287                         set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
1288                                 qpd->doorbell_bitmap);
1289                 }
1290         }
1291
1292         return 0;
1293 }
1294
1295 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
1296                                                         struct kfd_process *p)
1297 {
1298         int i;
1299
1300         for (i = 0; i < p->n_pdds; i++)
1301                 if (p->pdds[i]->dev == dev)
1302                         return p->pdds[i];
1303
1304         return NULL;
1305 }
1306
1307 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
1308                                                         struct kfd_process *p)
1309 {
1310         struct kfd_process_device *pdd = NULL;
1311
1312         if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
1313                 return NULL;
1314         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
1315         if (!pdd)
1316                 return NULL;
1317
1318         if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
1319                 pr_err("Failed to alloc doorbell for pdd\n");
1320                 goto err_free_pdd;
1321         }
1322
1323         if (init_doorbell_bitmap(&pdd->qpd, dev)) {
1324                 pr_err("Failed to init doorbell for process\n");
1325                 goto err_free_pdd;
1326         }
1327
1328         pdd->dev = dev;
1329         INIT_LIST_HEAD(&pdd->qpd.queues_list);
1330         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
1331         pdd->qpd.dqm = dev->dqm;
1332         pdd->qpd.pqm = &p->pqm;
1333         pdd->qpd.evicted = 0;
1334         pdd->qpd.mapped_gws_queue = false;
1335         pdd->process = p;
1336         pdd->bound = PDD_UNBOUND;
1337         pdd->already_dequeued = false;
1338         pdd->runtime_inuse = false;
1339         pdd->vram_usage = 0;
1340         pdd->sdma_past_activity_counter = 0;
1341         atomic64_set(&pdd->evict_duration_counter, 0);
1342         p->pdds[p->n_pdds++] = pdd;
1343
1344         /* Init idr used for memory handle translation */
1345         idr_init(&pdd->alloc_idr);
1346
1347         return pdd;
1348
1349 err_free_pdd:
1350         kfree(pdd);
1351         return NULL;
1352 }
1353
1354 /**
1355  * kfd_process_device_init_vm - Initialize a VM for a process-device
1356  *
1357  * @pdd: The process-device
1358  * @drm_file: Optional pointer to a DRM file descriptor
1359  *
1360  * If @drm_file is specified, it will be used to acquire the VM from
1361  * that file descriptor. If successful, the @pdd takes ownership of
1362  * the file descriptor.
1363  *
1364  * If @drm_file is NULL, a new VM is created.
1365  *
1366  * Returns 0 on success, -errno on failure.
1367  */
1368 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
1369                                struct file *drm_file)
1370 {
1371         struct kfd_process *p;
1372         struct kfd_dev *dev;
1373         int ret;
1374
1375         if (!drm_file)
1376                 return -EINVAL;
1377
1378         if (pdd->vm)
1379                 return -EBUSY;
1380
1381         p = pdd->process;
1382         dev = pdd->dev;
1383
1384         ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
1385                 dev->kgd, drm_file, p->pasid,
1386                 &pdd->vm, &p->kgd_process_info, &p->ef);
1387         if (ret) {
1388                 pr_err("Failed to create process VM object\n");
1389                 return ret;
1390         }
1391
1392         amdgpu_vm_set_task_info(pdd->vm);
1393
1394         ret = kfd_process_device_reserve_ib_mem(pdd);
1395         if (ret)
1396                 goto err_reserve_ib_mem;
1397         ret = kfd_process_device_init_cwsr_dgpu(pdd);
1398         if (ret)
1399                 goto err_init_cwsr;
1400
1401         pdd->drm_file = drm_file;
1402
1403         return 0;
1404
1405 err_init_cwsr:
1406 err_reserve_ib_mem:
1407         kfd_process_device_free_bos(pdd);
1408         pdd->vm = NULL;
1409
1410         return ret;
1411 }
1412
1413 /*
1414  * Direct the IOMMU to bind the process (specifically the pasid->mm)
1415  * to the device.
1416  * Unbinding occurs when the process dies or the device is removed.
1417  *
1418  * Assumes that the process lock is held.
1419  */
1420 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
1421                                                         struct kfd_process *p)
1422 {
1423         struct kfd_process_device *pdd;
1424         int err;
1425
1426         pdd = kfd_get_process_device_data(dev, p);
1427         if (!pdd) {
1428                 pr_err("Process device data doesn't exist\n");
1429                 return ERR_PTR(-ENOMEM);
1430         }
1431
1432         if (!pdd->vm)
1433                 return ERR_PTR(-ENODEV);
1434
1435         /*
1436          * signal runtime-pm system to auto resume and prevent
1437          * further runtime suspend once device pdd is created until
1438          * pdd is destroyed.
1439          */
1440         if (!pdd->runtime_inuse) {
1441                 err = pm_runtime_get_sync(dev->ddev->dev);
1442                 if (err < 0) {
1443                         pm_runtime_put_autosuspend(dev->ddev->dev);
1444                         return ERR_PTR(err);
1445                 }
1446         }
1447
1448         err = kfd_iommu_bind_process_to_device(pdd);
1449         if (err)
1450                 goto out;
1451
1452         /*
1453          * make sure that runtime_usage counter is incremented just once
1454          * per pdd
1455          */
1456         pdd->runtime_inuse = true;
1457
1458         return pdd;
1459
1460 out:
1461         /* balance runpm reference count and exit with error */
1462         if (!pdd->runtime_inuse) {
1463                 pm_runtime_mark_last_busy(dev->ddev->dev);
1464                 pm_runtime_put_autosuspend(dev->ddev->dev);
1465         }
1466
1467         return ERR_PTR(err);
1468 }
1469
1470 /* Create specific handle mapped to mem from process local memory idr
1471  * Assumes that the process lock is held.
1472  */
1473 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1474                                         void *mem)
1475 {
1476         return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1477 }
1478
1479 /* Translate specific handle from process local memory idr
1480  * Assumes that the process lock is held.
1481  */
1482 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1483                                         int handle)
1484 {
1485         if (handle < 0)
1486                 return NULL;
1487
1488         return idr_find(&pdd->alloc_idr, handle);
1489 }
1490
1491 /* Remove specific handle from process local memory idr
1492  * Assumes that the process lock is held.
1493  */
1494 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1495                                         int handle)
1496 {
1497         if (handle >= 0)
1498                 idr_remove(&pdd->alloc_idr, handle);
1499 }
1500
1501 /* This increments the process->ref counter. */
1502 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
1503 {
1504         struct kfd_process *p, *ret_p = NULL;
1505         unsigned int temp;
1506
1507         int idx = srcu_read_lock(&kfd_processes_srcu);
1508
1509         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1510                 if (p->pasid == pasid) {
1511                         kref_get(&p->ref);
1512                         ret_p = p;
1513                         break;
1514                 }
1515         }
1516
1517         srcu_read_unlock(&kfd_processes_srcu, idx);
1518
1519         return ret_p;
1520 }
1521
1522 /* This increments the process->ref counter. */
1523 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1524 {
1525         struct kfd_process *p;
1526
1527         int idx = srcu_read_lock(&kfd_processes_srcu);
1528
1529         p = find_process_by_mm(mm);
1530         if (p)
1531                 kref_get(&p->ref);
1532
1533         srcu_read_unlock(&kfd_processes_srcu, idx);
1534
1535         return p;
1536 }
1537
1538 /* kfd_process_evict_queues - Evict all user queues of a process
1539  *
1540  * Eviction is reference-counted per process-device. This means multiple
1541  * evictions from different sources can be nested safely.
1542  */
1543 int kfd_process_evict_queues(struct kfd_process *p)
1544 {
1545         int r = 0;
1546         int i;
1547         unsigned int n_evicted = 0;
1548
1549         for (i = 0; i < p->n_pdds; i++) {
1550                 struct kfd_process_device *pdd = p->pdds[i];
1551
1552                 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1553                                                             &pdd->qpd);
1554                 if (r) {
1555                         pr_err("Failed to evict process queues\n");
1556                         goto fail;
1557                 }
1558                 n_evicted++;
1559         }
1560
1561         return r;
1562
1563 fail:
1564         /* To keep state consistent, roll back partial eviction by
1565          * restoring queues
1566          */
1567         for (i = 0; i < p->n_pdds; i++) {
1568                 struct kfd_process_device *pdd = p->pdds[i];
1569
1570                 if (n_evicted == 0)
1571                         break;
1572                 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1573                                                               &pdd->qpd))
1574                         pr_err("Failed to restore queues\n");
1575
1576                 n_evicted--;
1577         }
1578
1579         return r;
1580 }
1581
1582 /* kfd_process_restore_queues - Restore all user queues of a process */
1583 int kfd_process_restore_queues(struct kfd_process *p)
1584 {
1585         int r, ret = 0;
1586         int i;
1587
1588         for (i = 0; i < p->n_pdds; i++) {
1589                 struct kfd_process_device *pdd = p->pdds[i];
1590
1591                 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1592                                                               &pdd->qpd);
1593                 if (r) {
1594                         pr_err("Failed to restore process queues\n");
1595                         if (!ret)
1596                                 ret = r;
1597                 }
1598         }
1599
1600         return ret;
1601 }
1602
1603 static void evict_process_worker(struct work_struct *work)
1604 {
1605         int ret;
1606         struct kfd_process *p;
1607         struct delayed_work *dwork;
1608
1609         dwork = to_delayed_work(work);
1610
1611         /* Process termination destroys this worker thread. So during the
1612          * lifetime of this thread, kfd_process p will be valid
1613          */
1614         p = container_of(dwork, struct kfd_process, eviction_work);
1615         WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1616                   "Eviction fence mismatch\n");
1617
1618         /* Narrow window of overlap between restore and evict work
1619          * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1620          * unreserves KFD BOs, it is possible to evicted again. But
1621          * restore has few more steps of finish. So lets wait for any
1622          * previous restore work to complete
1623          */
1624         flush_delayed_work(&p->restore_work);
1625
1626         pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1627         ret = kfd_process_evict_queues(p);
1628         if (!ret) {
1629                 dma_fence_signal(p->ef);
1630                 dma_fence_put(p->ef);
1631                 p->ef = NULL;
1632                 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1633                                 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1634
1635                 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1636         } else
1637                 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1638 }
1639
1640 static void restore_process_worker(struct work_struct *work)
1641 {
1642         struct delayed_work *dwork;
1643         struct kfd_process *p;
1644         int ret = 0;
1645
1646         dwork = to_delayed_work(work);
1647
1648         /* Process termination destroys this worker thread. So during the
1649          * lifetime of this thread, kfd_process p will be valid
1650          */
1651         p = container_of(dwork, struct kfd_process, restore_work);
1652         pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1653
1654         /* Setting last_restore_timestamp before successful restoration.
1655          * Otherwise this would have to be set by KGD (restore_process_bos)
1656          * before KFD BOs are unreserved. If not, the process can be evicted
1657          * again before the timestamp is set.
1658          * If restore fails, the timestamp will be set again in the next
1659          * attempt. This would mean that the minimum GPU quanta would be
1660          * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1661          * functions)
1662          */
1663
1664         p->last_restore_timestamp = get_jiffies_64();
1665         ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1666                                                      &p->ef);
1667         if (ret) {
1668                 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1669                          p->pasid, PROCESS_BACK_OFF_TIME_MS);
1670                 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1671                                 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1672                 WARN(!ret, "reschedule restore work failed\n");
1673                 return;
1674         }
1675
1676         ret = kfd_process_restore_queues(p);
1677         if (!ret)
1678                 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1679         else
1680                 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1681 }
1682
1683 void kfd_suspend_all_processes(void)
1684 {
1685         struct kfd_process *p;
1686         unsigned int temp;
1687         int idx = srcu_read_lock(&kfd_processes_srcu);
1688
1689         WARN(debug_evictions, "Evicting all processes");
1690         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1691                 cancel_delayed_work_sync(&p->eviction_work);
1692                 cancel_delayed_work_sync(&p->restore_work);
1693
1694                 if (kfd_process_evict_queues(p))
1695                         pr_err("Failed to suspend process 0x%x\n", p->pasid);
1696                 dma_fence_signal(p->ef);
1697                 dma_fence_put(p->ef);
1698                 p->ef = NULL;
1699         }
1700         srcu_read_unlock(&kfd_processes_srcu, idx);
1701 }
1702
1703 int kfd_resume_all_processes(void)
1704 {
1705         struct kfd_process *p;
1706         unsigned int temp;
1707         int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1708
1709         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1710                 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1711                         pr_err("Restore process %d failed during resume\n",
1712                                p->pasid);
1713                         ret = -EFAULT;
1714                 }
1715         }
1716         srcu_read_unlock(&kfd_processes_srcu, idx);
1717         return ret;
1718 }
1719
1720 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1721                           struct vm_area_struct *vma)
1722 {
1723         struct kfd_process_device *pdd;
1724         struct qcm_process_device *qpd;
1725
1726         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1727                 pr_err("Incorrect CWSR mapping size.\n");
1728                 return -EINVAL;
1729         }
1730
1731         pdd = kfd_get_process_device_data(dev, process);
1732         if (!pdd)
1733                 return -EINVAL;
1734         qpd = &pdd->qpd;
1735
1736         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1737                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
1738         if (!qpd->cwsr_kaddr) {
1739                 pr_err("Error allocating per process CWSR buffer.\n");
1740                 return -ENOMEM;
1741         }
1742
1743         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1744                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1745         /* Mapping pages to user process */
1746         return remap_pfn_range(vma, vma->vm_start,
1747                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1748                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1749 }
1750
1751 void kfd_flush_tlb(struct kfd_process_device *pdd)
1752 {
1753         struct kfd_dev *dev = pdd->dev;
1754
1755         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1756                 /* Nothing to flush until a VMID is assigned, which
1757                  * only happens when the first queue is created.
1758                  */
1759                 if (pdd->qpd.vmid)
1760                         amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1761                                                         pdd->qpd.vmid);
1762         } else {
1763                 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1764                                                 pdd->process->pasid);
1765         }
1766 }
1767
1768 #if defined(CONFIG_DEBUG_FS)
1769
1770 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1771 {
1772         struct kfd_process *p;
1773         unsigned int temp;
1774         int r = 0;
1775
1776         int idx = srcu_read_lock(&kfd_processes_srcu);
1777
1778         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1779                 seq_printf(m, "Process %d PASID 0x%x:\n",
1780                            p->lead_thread->tgid, p->pasid);
1781
1782                 mutex_lock(&p->mutex);
1783                 r = pqm_debugfs_mqds(m, &p->pqm);
1784                 mutex_unlock(&p->mutex);
1785
1786                 if (r)
1787                         break;
1788         }
1789
1790         srcu_read_unlock(&kfd_processes_srcu, idx);
1791
1792         return r;
1793 }
1794
1795 #endif
1796
This page took 0.142874 seconds and 4 git commands to generate.