]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_process.c
Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_process.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34
35 struct mm_struct;
36
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40 #include "kfd_iommu.h"
41
42 /*
43  * List of struct kfd_process (field kfd_process).
44  * Unique/indexed by mm_struct*
45  */
46 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
47 static DEFINE_MUTEX(kfd_processes_mutex);
48
49 DEFINE_SRCU(kfd_processes_srcu);
50
51 /* For process termination handling */
52 static struct workqueue_struct *kfd_process_wq;
53
54 /* Ordered, single-threaded workqueue for restoring evicted
55  * processes. Restoring multiple processes concurrently under memory
56  * pressure can lead to processes blocking each other from validating
57  * their BOs and result in a live-lock situation where processes
58  * remain evicted indefinitely.
59  */
60 static struct workqueue_struct *kfd_restore_wq;
61
62 static struct kfd_process *find_process(const struct task_struct *thread);
63 static void kfd_process_ref_release(struct kref *ref);
64 static struct kfd_process *create_process(const struct task_struct *thread,
65                                         struct file *filep);
66
67 static void evict_process_worker(struct work_struct *work);
68 static void restore_process_worker(struct work_struct *work);
69
70
71 int kfd_process_create_wq(void)
72 {
73         if (!kfd_process_wq)
74                 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
75         if (!kfd_restore_wq)
76                 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
77
78         if (!kfd_process_wq || !kfd_restore_wq) {
79                 kfd_process_destroy_wq();
80                 return -ENOMEM;
81         }
82
83         return 0;
84 }
85
86 void kfd_process_destroy_wq(void)
87 {
88         if (kfd_process_wq) {
89                 destroy_workqueue(kfd_process_wq);
90                 kfd_process_wq = NULL;
91         }
92         if (kfd_restore_wq) {
93                 destroy_workqueue(kfd_restore_wq);
94                 kfd_restore_wq = NULL;
95         }
96 }
97
98 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
99                         struct kfd_process_device *pdd)
100 {
101         struct kfd_dev *dev = pdd->dev;
102
103         dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
104         dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
105 }
106
107 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
108  *      This function should be only called right after the process
109  *      is created and when kfd_processes_mutex is still being held
110  *      to avoid concurrency. Because of that exclusiveness, we do
111  *      not need to take p->mutex.
112  */
113 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
114                                    uint64_t gpu_va, uint32_t size,
115                                    uint32_t flags, void **kptr)
116 {
117         struct kfd_dev *kdev = pdd->dev;
118         struct kgd_mem *mem = NULL;
119         int handle;
120         int err;
121
122         err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
123                                                  pdd->vm, &mem, NULL, flags);
124         if (err)
125                 goto err_alloc_mem;
126
127         err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
128         if (err)
129                 goto err_map_mem;
130
131         err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
132         if (err) {
133                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
134                 goto sync_memory_failed;
135         }
136
137         /* Create an obj handle so kfd_process_device_remove_obj_handle
138          * will take care of the bo removal when the process finishes.
139          * We do not need to take p->mutex, because the process is just
140          * created and the ioctls have not had the chance to run.
141          */
142         handle = kfd_process_device_create_obj_handle(pdd, mem);
143
144         if (handle < 0) {
145                 err = handle;
146                 goto free_gpuvm;
147         }
148
149         if (kptr) {
150                 err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
151                                 (struct kgd_mem *)mem, kptr, NULL);
152                 if (err) {
153                         pr_debug("Map GTT BO to kernel failed\n");
154                         goto free_obj_handle;
155                 }
156         }
157
158         return err;
159
160 free_obj_handle:
161         kfd_process_device_remove_obj_handle(pdd, handle);
162 free_gpuvm:
163 sync_memory_failed:
164         kfd_process_free_gpuvm(mem, pdd);
165         return err;
166
167 err_map_mem:
168         kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
169 err_alloc_mem:
170         *kptr = NULL;
171         return err;
172 }
173
174 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
175  *      process for IB usage The memory reserved is for KFD to submit
176  *      IB to AMDGPU from kernel.  If the memory is reserved
177  *      successfully, ib_kaddr will have the CPU/kernel
178  *      address. Check ib_kaddr before accessing the memory.
179  */
180 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
181 {
182         struct qcm_process_device *qpd = &pdd->qpd;
183         uint32_t flags = ALLOC_MEM_FLAGS_GTT |
184                          ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
185                          ALLOC_MEM_FLAGS_WRITABLE |
186                          ALLOC_MEM_FLAGS_EXECUTABLE;
187         void *kaddr;
188         int ret;
189
190         if (qpd->ib_kaddr || !qpd->ib_base)
191                 return 0;
192
193         /* ib_base is only set for dGPU */
194         ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
195                                       &kaddr);
196         if (ret)
197                 return ret;
198
199         qpd->ib_kaddr = kaddr;
200
201         return 0;
202 }
203
204 struct kfd_process *kfd_create_process(struct file *filep)
205 {
206         struct kfd_process *process;
207         struct task_struct *thread = current;
208
209         if (!thread->mm)
210                 return ERR_PTR(-EINVAL);
211
212         /* Only the pthreads threading model is supported. */
213         if (thread->group_leader->mm != thread->mm)
214                 return ERR_PTR(-EINVAL);
215
216         /*
217          * take kfd processes mutex before starting of process creation
218          * so there won't be a case where two threads of the same process
219          * create two kfd_process structures
220          */
221         mutex_lock(&kfd_processes_mutex);
222
223         /* A prior open of /dev/kfd could have already created the process. */
224         process = find_process(thread);
225         if (process)
226                 pr_debug("Process already found\n");
227         else
228                 process = create_process(thread, filep);
229
230         mutex_unlock(&kfd_processes_mutex);
231
232         return process;
233 }
234
235 struct kfd_process *kfd_get_process(const struct task_struct *thread)
236 {
237         struct kfd_process *process;
238
239         if (!thread->mm)
240                 return ERR_PTR(-EINVAL);
241
242         /* Only the pthreads threading model is supported. */
243         if (thread->group_leader->mm != thread->mm)
244                 return ERR_PTR(-EINVAL);
245
246         process = find_process(thread);
247
248         return process;
249 }
250
251 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
252 {
253         struct kfd_process *process;
254
255         hash_for_each_possible_rcu(kfd_processes_table, process,
256                                         kfd_processes, (uintptr_t)mm)
257                 if (process->mm == mm)
258                         return process;
259
260         return NULL;
261 }
262
263 static struct kfd_process *find_process(const struct task_struct *thread)
264 {
265         struct kfd_process *p;
266         int idx;
267
268         idx = srcu_read_lock(&kfd_processes_srcu);
269         p = find_process_by_mm(thread->mm);
270         srcu_read_unlock(&kfd_processes_srcu, idx);
271
272         return p;
273 }
274
275 void kfd_unref_process(struct kfd_process *p)
276 {
277         kref_put(&p->ref, kfd_process_ref_release);
278 }
279
280 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
281 {
282         struct kfd_process *p = pdd->process;
283         void *mem;
284         int id;
285
286         /*
287          * Remove all handles from idr and release appropriate
288          * local memory object
289          */
290         idr_for_each_entry(&pdd->alloc_idr, mem, id) {
291                 struct kfd_process_device *peer_pdd;
292
293                 list_for_each_entry(peer_pdd, &p->per_device_data,
294                                     per_device_list) {
295                         if (!peer_pdd->vm)
296                                 continue;
297                         peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
298                                 peer_pdd->dev->kgd, mem, peer_pdd->vm);
299                 }
300
301                 pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
302                 kfd_process_device_remove_obj_handle(pdd, id);
303         }
304 }
305
306 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
307 {
308         struct kfd_process_device *pdd;
309
310         list_for_each_entry(pdd, &p->per_device_data, per_device_list)
311                 kfd_process_device_free_bos(pdd);
312 }
313
314 static void kfd_process_destroy_pdds(struct kfd_process *p)
315 {
316         struct kfd_process_device *pdd, *temp;
317
318         list_for_each_entry_safe(pdd, temp, &p->per_device_data,
319                                  per_device_list) {
320                 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
321                                 pdd->dev->id, p->pasid);
322
323                 if (pdd->drm_file)
324                         fput(pdd->drm_file);
325                 else if (pdd->vm)
326                         pdd->dev->kfd2kgd->destroy_process_vm(
327                                 pdd->dev->kgd, pdd->vm);
328
329                 list_del(&pdd->per_device_list);
330
331                 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
332                         free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
333                                 get_order(KFD_CWSR_TBA_TMA_SIZE));
334
335                 kfree(pdd->qpd.doorbell_bitmap);
336                 idr_destroy(&pdd->alloc_idr);
337
338                 kfree(pdd);
339         }
340 }
341
342 /* No process locking is needed in this function, because the process
343  * is not findable any more. We must assume that no other thread is
344  * using it any more, otherwise we couldn't safely free the process
345  * structure in the end.
346  */
347 static void kfd_process_wq_release(struct work_struct *work)
348 {
349         struct kfd_process *p = container_of(work, struct kfd_process,
350                                              release_work);
351
352         kfd_iommu_unbind_process(p);
353
354         kfd_process_free_outstanding_kfd_bos(p);
355
356         kfd_process_destroy_pdds(p);
357         dma_fence_put(p->ef);
358
359         kfd_event_free_process(p);
360
361         kfd_pasid_free(p->pasid);
362         kfd_free_process_doorbells(p);
363
364         mutex_destroy(&p->mutex);
365
366         put_task_struct(p->lead_thread);
367
368         kfree(p);
369 }
370
371 static void kfd_process_ref_release(struct kref *ref)
372 {
373         struct kfd_process *p = container_of(ref, struct kfd_process, ref);
374
375         INIT_WORK(&p->release_work, kfd_process_wq_release);
376         queue_work(kfd_process_wq, &p->release_work);
377 }
378
379 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
380 {
381         struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
382
383         kfd_unref_process(p);
384 }
385
386 static void kfd_process_notifier_release(struct mmu_notifier *mn,
387                                         struct mm_struct *mm)
388 {
389         struct kfd_process *p;
390         struct kfd_process_device *pdd = NULL;
391
392         /*
393          * The kfd_process structure can not be free because the
394          * mmu_notifier srcu is read locked
395          */
396         p = container_of(mn, struct kfd_process, mmu_notifier);
397         if (WARN_ON(p->mm != mm))
398                 return;
399
400         mutex_lock(&kfd_processes_mutex);
401         hash_del_rcu(&p->kfd_processes);
402         mutex_unlock(&kfd_processes_mutex);
403         synchronize_srcu(&kfd_processes_srcu);
404
405         cancel_delayed_work_sync(&p->eviction_work);
406         cancel_delayed_work_sync(&p->restore_work);
407
408         mutex_lock(&p->mutex);
409
410         /* Iterate over all process device data structures and if the
411          * pdd is in debug mode, we should first force unregistration,
412          * then we will be able to destroy the queues
413          */
414         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
415                 struct kfd_dev *dev = pdd->dev;
416
417                 mutex_lock(kfd_get_dbgmgr_mutex());
418                 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
419                         if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
420                                 kfd_dbgmgr_destroy(dev->dbgmgr);
421                                 dev->dbgmgr = NULL;
422                         }
423                 }
424                 mutex_unlock(kfd_get_dbgmgr_mutex());
425         }
426
427         kfd_process_dequeue_from_all_devices(p);
428         pqm_uninit(&p->pqm);
429
430         /* Indicate to other users that MM is no longer valid */
431         p->mm = NULL;
432
433         mutex_unlock(&p->mutex);
434
435         mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
436         mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
437 }
438
439 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
440         .release = kfd_process_notifier_release,
441 };
442
443 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
444 {
445         unsigned long  offset;
446         struct kfd_process_device *pdd;
447
448         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
449                 struct kfd_dev *dev = pdd->dev;
450                 struct qcm_process_device *qpd = &pdd->qpd;
451
452                 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
453                         continue;
454
455                 offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
456                         << PAGE_SHIFT;
457                 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
458                         KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
459                         MAP_SHARED, offset);
460
461                 if (IS_ERR_VALUE(qpd->tba_addr)) {
462                         int err = qpd->tba_addr;
463
464                         pr_err("Failure to set tba address. error %d.\n", err);
465                         qpd->tba_addr = 0;
466                         qpd->cwsr_kaddr = NULL;
467                         return err;
468                 }
469
470                 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
471
472                 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
473                 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
474                         qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
475         }
476
477         return 0;
478 }
479
480 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
481 {
482         struct kfd_dev *dev = pdd->dev;
483         struct qcm_process_device *qpd = &pdd->qpd;
484         uint32_t flags = ALLOC_MEM_FLAGS_GTT |
485                 ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE;
486         void *kaddr;
487         int ret;
488
489         if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
490                 return 0;
491
492         /* cwsr_base is only set for dGPU */
493         ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
494                                       KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
495         if (ret)
496                 return ret;
497
498         qpd->cwsr_kaddr = kaddr;
499         qpd->tba_addr = qpd->cwsr_base;
500
501         memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
502
503         qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
504         pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
505                  qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
506
507         return 0;
508 }
509
510 static struct kfd_process *create_process(const struct task_struct *thread,
511                                         struct file *filep)
512 {
513         struct kfd_process *process;
514         int err = -ENOMEM;
515
516         process = kzalloc(sizeof(*process), GFP_KERNEL);
517
518         if (!process)
519                 goto err_alloc_process;
520
521         process->pasid = kfd_pasid_alloc();
522         if (process->pasid == 0)
523                 goto err_alloc_pasid;
524
525         if (kfd_alloc_process_doorbells(process) < 0)
526                 goto err_alloc_doorbells;
527
528         kref_init(&process->ref);
529
530         mutex_init(&process->mutex);
531
532         process->mm = thread->mm;
533
534         /* register notifier */
535         process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
536         err = mmu_notifier_register(&process->mmu_notifier, process->mm);
537         if (err)
538                 goto err_mmu_notifier;
539
540         hash_add_rcu(kfd_processes_table, &process->kfd_processes,
541                         (uintptr_t)process->mm);
542
543         process->lead_thread = thread->group_leader;
544         get_task_struct(process->lead_thread);
545
546         INIT_LIST_HEAD(&process->per_device_data);
547
548         kfd_event_init_process(process);
549
550         err = pqm_init(&process->pqm, process);
551         if (err != 0)
552                 goto err_process_pqm_init;
553
554         /* init process apertures*/
555         process->is_32bit_user_mode = in_compat_syscall();
556         err = kfd_init_apertures(process);
557         if (err != 0)
558                 goto err_init_apertures;
559
560         INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
561         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
562         process->last_restore_timestamp = get_jiffies_64();
563
564         err = kfd_process_init_cwsr_apu(process, filep);
565         if (err)
566                 goto err_init_cwsr;
567
568         return process;
569
570 err_init_cwsr:
571         kfd_process_free_outstanding_kfd_bos(process);
572         kfd_process_destroy_pdds(process);
573 err_init_apertures:
574         pqm_uninit(&process->pqm);
575 err_process_pqm_init:
576         hash_del_rcu(&process->kfd_processes);
577         synchronize_rcu();
578         mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
579 err_mmu_notifier:
580         mutex_destroy(&process->mutex);
581         kfd_free_process_doorbells(process);
582 err_alloc_doorbells:
583         kfd_pasid_free(process->pasid);
584 err_alloc_pasid:
585         kfree(process);
586 err_alloc_process:
587         return ERR_PTR(err);
588 }
589
590 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
591                         struct kfd_dev *dev)
592 {
593         unsigned int i;
594
595         if (!KFD_IS_SOC15(dev->device_info->asic_family))
596                 return 0;
597
598         qpd->doorbell_bitmap =
599                 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
600                                      BITS_PER_BYTE), GFP_KERNEL);
601         if (!qpd->doorbell_bitmap)
602                 return -ENOMEM;
603
604         /* Mask out any reserved doorbells */
605         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
606                 if ((dev->shared_resources.reserved_doorbell_mask & i) ==
607                     dev->shared_resources.reserved_doorbell_val) {
608                         set_bit(i, qpd->doorbell_bitmap);
609                         pr_debug("reserved doorbell 0x%03x\n", i);
610                 }
611
612         return 0;
613 }
614
615 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
616                                                         struct kfd_process *p)
617 {
618         struct kfd_process_device *pdd = NULL;
619
620         list_for_each_entry(pdd, &p->per_device_data, per_device_list)
621                 if (pdd->dev == dev)
622                         return pdd;
623
624         return NULL;
625 }
626
627 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
628                                                         struct kfd_process *p)
629 {
630         struct kfd_process_device *pdd = NULL;
631
632         pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
633         if (!pdd)
634                 return NULL;
635
636         if (init_doorbell_bitmap(&pdd->qpd, dev)) {
637                 pr_err("Failed to init doorbell for process\n");
638                 kfree(pdd);
639                 return NULL;
640         }
641
642         pdd->dev = dev;
643         INIT_LIST_HEAD(&pdd->qpd.queues_list);
644         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
645         pdd->qpd.dqm = dev->dqm;
646         pdd->qpd.pqm = &p->pqm;
647         pdd->qpd.evicted = 0;
648         pdd->process = p;
649         pdd->bound = PDD_UNBOUND;
650         pdd->already_dequeued = false;
651         list_add(&pdd->per_device_list, &p->per_device_data);
652
653         /* Init idr used for memory handle translation */
654         idr_init(&pdd->alloc_idr);
655
656         return pdd;
657 }
658
659 /**
660  * kfd_process_device_init_vm - Initialize a VM for a process-device
661  *
662  * @pdd: The process-device
663  * @drm_file: Optional pointer to a DRM file descriptor
664  *
665  * If @drm_file is specified, it will be used to acquire the VM from
666  * that file descriptor. If successful, the @pdd takes ownership of
667  * the file descriptor.
668  *
669  * If @drm_file is NULL, a new VM is created.
670  *
671  * Returns 0 on success, -errno on failure.
672  */
673 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
674                                struct file *drm_file)
675 {
676         struct kfd_process *p;
677         struct kfd_dev *dev;
678         int ret;
679
680         if (pdd->vm)
681                 return drm_file ? -EBUSY : 0;
682
683         p = pdd->process;
684         dev = pdd->dev;
685
686         if (drm_file)
687                 ret = dev->kfd2kgd->acquire_process_vm(
688                         dev->kgd, drm_file,
689                         &pdd->vm, &p->kgd_process_info, &p->ef);
690         else
691                 ret = dev->kfd2kgd->create_process_vm(
692                         dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
693         if (ret) {
694                 pr_err("Failed to create process VM object\n");
695                 return ret;
696         }
697
698         ret = kfd_process_device_reserve_ib_mem(pdd);
699         if (ret)
700                 goto err_reserve_ib_mem;
701         ret = kfd_process_device_init_cwsr_dgpu(pdd);
702         if (ret)
703                 goto err_init_cwsr;
704
705         pdd->drm_file = drm_file;
706
707         return 0;
708
709 err_init_cwsr:
710 err_reserve_ib_mem:
711         kfd_process_device_free_bos(pdd);
712         if (!drm_file)
713                 dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
714         pdd->vm = NULL;
715
716         return ret;
717 }
718
719 /*
720  * Direct the IOMMU to bind the process (specifically the pasid->mm)
721  * to the device.
722  * Unbinding occurs when the process dies or the device is removed.
723  *
724  * Assumes that the process lock is held.
725  */
726 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
727                                                         struct kfd_process *p)
728 {
729         struct kfd_process_device *pdd;
730         int err;
731
732         pdd = kfd_get_process_device_data(dev, p);
733         if (!pdd) {
734                 pr_err("Process device data doesn't exist\n");
735                 return ERR_PTR(-ENOMEM);
736         }
737
738         err = kfd_iommu_bind_process_to_device(pdd);
739         if (err)
740                 return ERR_PTR(err);
741
742         err = kfd_process_device_init_vm(pdd, NULL);
743         if (err)
744                 return ERR_PTR(err);
745
746         return pdd;
747 }
748
749 struct kfd_process_device *kfd_get_first_process_device_data(
750                                                 struct kfd_process *p)
751 {
752         return list_first_entry(&p->per_device_data,
753                                 struct kfd_process_device,
754                                 per_device_list);
755 }
756
757 struct kfd_process_device *kfd_get_next_process_device_data(
758                                                 struct kfd_process *p,
759                                                 struct kfd_process_device *pdd)
760 {
761         if (list_is_last(&pdd->per_device_list, &p->per_device_data))
762                 return NULL;
763         return list_next_entry(pdd, per_device_list);
764 }
765
766 bool kfd_has_process_device_data(struct kfd_process *p)
767 {
768         return !(list_empty(&p->per_device_data));
769 }
770
771 /* Create specific handle mapped to mem from process local memory idr
772  * Assumes that the process lock is held.
773  */
774 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
775                                         void *mem)
776 {
777         return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
778 }
779
780 /* Translate specific handle from process local memory idr
781  * Assumes that the process lock is held.
782  */
783 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
784                                         int handle)
785 {
786         if (handle < 0)
787                 return NULL;
788
789         return idr_find(&pdd->alloc_idr, handle);
790 }
791
792 /* Remove specific handle from process local memory idr
793  * Assumes that the process lock is held.
794  */
795 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
796                                         int handle)
797 {
798         if (handle >= 0)
799                 idr_remove(&pdd->alloc_idr, handle);
800 }
801
802 /* This increments the process->ref counter. */
803 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
804 {
805         struct kfd_process *p, *ret_p = NULL;
806         unsigned int temp;
807
808         int idx = srcu_read_lock(&kfd_processes_srcu);
809
810         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
811                 if (p->pasid == pasid) {
812                         kref_get(&p->ref);
813                         ret_p = p;
814                         break;
815                 }
816         }
817
818         srcu_read_unlock(&kfd_processes_srcu, idx);
819
820         return ret_p;
821 }
822
823 /* This increments the process->ref counter. */
824 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
825 {
826         struct kfd_process *p;
827
828         int idx = srcu_read_lock(&kfd_processes_srcu);
829
830         p = find_process_by_mm(mm);
831         if (p)
832                 kref_get(&p->ref);
833
834         srcu_read_unlock(&kfd_processes_srcu, idx);
835
836         return p;
837 }
838
839 /* process_evict_queues - Evict all user queues of a process
840  *
841  * Eviction is reference-counted per process-device. This means multiple
842  * evictions from different sources can be nested safely.
843  */
844 int kfd_process_evict_queues(struct kfd_process *p)
845 {
846         struct kfd_process_device *pdd;
847         int r = 0;
848         unsigned int n_evicted = 0;
849
850         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
851                 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
852                                                             &pdd->qpd);
853                 if (r) {
854                         pr_err("Failed to evict process queues\n");
855                         goto fail;
856                 }
857                 n_evicted++;
858         }
859
860         return r;
861
862 fail:
863         /* To keep state consistent, roll back partial eviction by
864          * restoring queues
865          */
866         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
867                 if (n_evicted == 0)
868                         break;
869                 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
870                                                               &pdd->qpd))
871                         pr_err("Failed to restore queues\n");
872
873                 n_evicted--;
874         }
875
876         return r;
877 }
878
879 /* process_restore_queues - Restore all user queues of a process */
880 int kfd_process_restore_queues(struct kfd_process *p)
881 {
882         struct kfd_process_device *pdd;
883         int r, ret = 0;
884
885         list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
886                 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
887                                                               &pdd->qpd);
888                 if (r) {
889                         pr_err("Failed to restore process queues\n");
890                         if (!ret)
891                                 ret = r;
892                 }
893         }
894
895         return ret;
896 }
897
898 static void evict_process_worker(struct work_struct *work)
899 {
900         int ret;
901         struct kfd_process *p;
902         struct delayed_work *dwork;
903
904         dwork = to_delayed_work(work);
905
906         /* Process termination destroys this worker thread. So during the
907          * lifetime of this thread, kfd_process p will be valid
908          */
909         p = container_of(dwork, struct kfd_process, eviction_work);
910         WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
911                   "Eviction fence mismatch\n");
912
913         /* Narrow window of overlap between restore and evict work
914          * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
915          * unreserves KFD BOs, it is possible to evicted again. But
916          * restore has few more steps of finish. So lets wait for any
917          * previous restore work to complete
918          */
919         flush_delayed_work(&p->restore_work);
920
921         pr_debug("Started evicting pasid %d\n", p->pasid);
922         ret = kfd_process_evict_queues(p);
923         if (!ret) {
924                 dma_fence_signal(p->ef);
925                 dma_fence_put(p->ef);
926                 p->ef = NULL;
927                 queue_delayed_work(kfd_restore_wq, &p->restore_work,
928                                 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
929
930                 pr_debug("Finished evicting pasid %d\n", p->pasid);
931         } else
932                 pr_err("Failed to evict queues of pasid %d\n", p->pasid);
933 }
934
935 static void restore_process_worker(struct work_struct *work)
936 {
937         struct delayed_work *dwork;
938         struct kfd_process *p;
939         struct kfd_process_device *pdd;
940         int ret = 0;
941
942         dwork = to_delayed_work(work);
943
944         /* Process termination destroys this worker thread. So during the
945          * lifetime of this thread, kfd_process p will be valid
946          */
947         p = container_of(dwork, struct kfd_process, restore_work);
948
949         /* Call restore_process_bos on the first KGD device. This function
950          * takes care of restoring the whole process including other devices.
951          * Restore can fail if enough memory is not available. If so,
952          * reschedule again.
953          */
954         pdd = list_first_entry(&p->per_device_data,
955                                struct kfd_process_device,
956                                per_device_list);
957
958         pr_debug("Started restoring pasid %d\n", p->pasid);
959
960         /* Setting last_restore_timestamp before successful restoration.
961          * Otherwise this would have to be set by KGD (restore_process_bos)
962          * before KFD BOs are unreserved. If not, the process can be evicted
963          * again before the timestamp is set.
964          * If restore fails, the timestamp will be set again in the next
965          * attempt. This would mean that the minimum GPU quanta would be
966          * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
967          * functions)
968          */
969
970         p->last_restore_timestamp = get_jiffies_64();
971         ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
972                                                      &p->ef);
973         if (ret) {
974                 pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
975                          p->pasid, PROCESS_BACK_OFF_TIME_MS);
976                 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
977                                 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
978                 WARN(!ret, "reschedule restore work failed\n");
979                 return;
980         }
981
982         ret = kfd_process_restore_queues(p);
983         if (!ret)
984                 pr_debug("Finished restoring pasid %d\n", p->pasid);
985         else
986                 pr_err("Failed to restore queues of pasid %d\n", p->pasid);
987 }
988
989 void kfd_suspend_all_processes(void)
990 {
991         struct kfd_process *p;
992         unsigned int temp;
993         int idx = srcu_read_lock(&kfd_processes_srcu);
994
995         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
996                 cancel_delayed_work_sync(&p->eviction_work);
997                 cancel_delayed_work_sync(&p->restore_work);
998
999                 if (kfd_process_evict_queues(p))
1000                         pr_err("Failed to suspend process %d\n", p->pasid);
1001                 dma_fence_signal(p->ef);
1002                 dma_fence_put(p->ef);
1003                 p->ef = NULL;
1004         }
1005         srcu_read_unlock(&kfd_processes_srcu, idx);
1006 }
1007
1008 int kfd_resume_all_processes(void)
1009 {
1010         struct kfd_process *p;
1011         unsigned int temp;
1012         int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1013
1014         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1015                 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1016                         pr_err("Restore process %d failed during resume\n",
1017                                p->pasid);
1018                         ret = -EFAULT;
1019                 }
1020         }
1021         srcu_read_unlock(&kfd_processes_srcu, idx);
1022         return ret;
1023 }
1024
1025 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1026                           struct vm_area_struct *vma)
1027 {
1028         struct kfd_process_device *pdd;
1029         struct qcm_process_device *qpd;
1030
1031         if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1032                 pr_err("Incorrect CWSR mapping size.\n");
1033                 return -EINVAL;
1034         }
1035
1036         pdd = kfd_get_process_device_data(dev, process);
1037         if (!pdd)
1038                 return -EINVAL;
1039         qpd = &pdd->qpd;
1040
1041         qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1042                                         get_order(KFD_CWSR_TBA_TMA_SIZE));
1043         if (!qpd->cwsr_kaddr) {
1044                 pr_err("Error allocating per process CWSR buffer.\n");
1045                 return -ENOMEM;
1046         }
1047
1048         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1049                 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1050         /* Mapping pages to user process */
1051         return remap_pfn_range(vma, vma->vm_start,
1052                                PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1053                                KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1054 }
1055
1056 void kfd_flush_tlb(struct kfd_process_device *pdd)
1057 {
1058         struct kfd_dev *dev = pdd->dev;
1059         const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
1060
1061         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1062                 /* Nothing to flush until a VMID is assigned, which
1063                  * only happens when the first queue is created.
1064                  */
1065                 if (pdd->qpd.vmid)
1066                         f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
1067         } else {
1068                 f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
1069         }
1070 }
1071
1072 #if defined(CONFIG_DEBUG_FS)
1073
1074 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1075 {
1076         struct kfd_process *p;
1077         unsigned int temp;
1078         int r = 0;
1079
1080         int idx = srcu_read_lock(&kfd_processes_srcu);
1081
1082         hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1083                 seq_printf(m, "Process %d PASID %d:\n",
1084                            p->lead_thread->tgid, p->pasid);
1085
1086                 mutex_lock(&p->mutex);
1087                 r = pqm_debugfs_mqds(m, &p->pqm);
1088                 mutex_unlock(&p->mutex);
1089
1090                 if (r)
1091                         break;
1092         }
1093
1094         srcu_read_unlock(&kfd_processes_srcu, idx);
1095
1096         return r;
1097 }
1098
1099 #endif
This page took 0.09816 seconds and 4 git commands to generate.