2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
33 #include <linux/file.h>
34 #include <linux/pm_runtime.h>
35 #include "amdgpu_amdkfd.h"
41 #include "kfd_device_queue_manager.h"
42 #include "kfd_dbgmgr.h"
43 #include "kfd_iommu.h"
46 * List of struct kfd_process (field kfd_process).
47 * Unique/indexed by mm_struct*
49 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
50 static DEFINE_MUTEX(kfd_processes_mutex);
52 DEFINE_SRCU(kfd_processes_srcu);
54 /* For process termination handling */
55 static struct workqueue_struct *kfd_process_wq;
57 /* Ordered, single-threaded workqueue for restoring evicted
58 * processes. Restoring multiple processes concurrently under memory
59 * pressure can lead to processes blocking each other from validating
60 * their BOs and result in a live-lock situation where processes
61 * remain evicted indefinitely.
63 static struct workqueue_struct *kfd_restore_wq;
65 static struct kfd_process *find_process(const struct task_struct *thread);
66 static void kfd_process_ref_release(struct kref *ref);
67 static struct kfd_process *create_process(const struct task_struct *thread);
68 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
70 static void evict_process_worker(struct work_struct *work);
71 static void restore_process_worker(struct work_struct *work);
73 struct kfd_procfs_tree {
77 static struct kfd_procfs_tree procfs;
79 static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
84 if (strcmp(attr->name, "pasid") == 0) {
85 struct kfd_process *p = container_of(attr, struct kfd_process,
89 pr_err("Invalid attribute");
93 return snprintf(buffer, PAGE_SIZE, "%d\n", val);
96 static void kfd_procfs_kobj_release(struct kobject *kobj)
101 static const struct sysfs_ops kfd_procfs_ops = {
102 .show = kfd_procfs_show,
105 static struct kobj_type procfs_type = {
106 .release = kfd_procfs_kobj_release,
107 .sysfs_ops = &kfd_procfs_ops,
110 void kfd_procfs_init(void)
114 procfs.kobj = kfd_alloc_struct(procfs.kobj);
118 ret = kobject_init_and_add(procfs.kobj, &procfs_type,
119 &kfd_device->kobj, "proc");
121 pr_warn("Could not create procfs proc folder");
122 /* If we fail to create the procfs, clean up */
123 kfd_procfs_shutdown();
127 void kfd_procfs_shutdown(void)
130 kobject_del(procfs.kobj);
131 kobject_put(procfs.kobj);
136 static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
137 struct attribute *attr, char *buffer)
139 struct queue *q = container_of(kobj, struct queue, kobj);
141 if (!strcmp(attr->name, "size"))
142 return snprintf(buffer, PAGE_SIZE, "%llu",
143 q->properties.queue_size);
144 else if (!strcmp(attr->name, "type"))
145 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
146 else if (!strcmp(attr->name, "gpuid"))
147 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
149 pr_err("Invalid attribute");
154 static struct attribute attr_queue_size = {
156 .mode = KFD_SYSFS_FILE_MODE
159 static struct attribute attr_queue_type = {
161 .mode = KFD_SYSFS_FILE_MODE
164 static struct attribute attr_queue_gpuid = {
166 .mode = KFD_SYSFS_FILE_MODE
169 static struct attribute *procfs_queue_attrs[] = {
176 static const struct sysfs_ops procfs_queue_ops = {
177 .show = kfd_procfs_queue_show,
180 static struct kobj_type procfs_queue_type = {
181 .sysfs_ops = &procfs_queue_ops,
182 .default_attrs = procfs_queue_attrs,
185 int kfd_procfs_add_queue(struct queue *q)
187 struct kfd_process *proc;
190 if (!q || !q->process)
194 /* Create proc/<pid>/queues/<queue id> folder */
195 if (!proc->kobj_queues)
197 ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
198 proc->kobj_queues, "%u", q->properties.queue_id);
200 pr_warn("Creating proc/<pid>/queues/%u failed",
201 q->properties.queue_id);
202 kobject_put(&q->kobj);
209 void kfd_procfs_del_queue(struct queue *q)
214 kobject_del(&q->kobj);
215 kobject_put(&q->kobj);
218 int kfd_process_create_wq(void)
221 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
223 kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
225 if (!kfd_process_wq || !kfd_restore_wq) {
226 kfd_process_destroy_wq();
233 void kfd_process_destroy_wq(void)
235 if (kfd_process_wq) {
236 destroy_workqueue(kfd_process_wq);
237 kfd_process_wq = NULL;
239 if (kfd_restore_wq) {
240 destroy_workqueue(kfd_restore_wq);
241 kfd_restore_wq = NULL;
245 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
246 struct kfd_process_device *pdd)
248 struct kfd_dev *dev = pdd->dev;
250 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
251 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
254 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
255 * This function should be only called right after the process
256 * is created and when kfd_processes_mutex is still being held
257 * to avoid concurrency. Because of that exclusiveness, we do
258 * not need to take p->mutex.
260 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
261 uint64_t gpu_va, uint32_t size,
262 uint32_t flags, void **kptr)
264 struct kfd_dev *kdev = pdd->dev;
265 struct kgd_mem *mem = NULL;
269 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
270 pdd->vm, &mem, NULL, flags);
274 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
278 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
280 pr_debug("Sync memory failed, wait interrupted by user signal\n");
281 goto sync_memory_failed;
284 /* Create an obj handle so kfd_process_device_remove_obj_handle
285 * will take care of the bo removal when the process finishes.
286 * We do not need to take p->mutex, because the process is just
287 * created and the ioctls have not had the chance to run.
289 handle = kfd_process_device_create_obj_handle(pdd, mem);
297 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
298 (struct kgd_mem *)mem, kptr, NULL);
300 pr_debug("Map GTT BO to kernel failed\n");
301 goto free_obj_handle;
308 kfd_process_device_remove_obj_handle(pdd, handle);
311 kfd_process_free_gpuvm(mem, pdd);
315 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
321 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
322 * process for IB usage The memory reserved is for KFD to submit
323 * IB to AMDGPU from kernel. If the memory is reserved
324 * successfully, ib_kaddr will have the CPU/kernel
325 * address. Check ib_kaddr before accessing the memory.
327 static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
329 struct qcm_process_device *qpd = &pdd->qpd;
330 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
331 KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
332 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
333 KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
337 if (qpd->ib_kaddr || !qpd->ib_base)
340 /* ib_base is only set for dGPU */
341 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
346 qpd->ib_kaddr = kaddr;
351 struct kfd_process *kfd_create_process(struct file *filep)
353 struct kfd_process *process;
354 struct task_struct *thread = current;
358 return ERR_PTR(-EINVAL);
360 /* Only the pthreads threading model is supported. */
361 if (thread->group_leader->mm != thread->mm)
362 return ERR_PTR(-EINVAL);
365 * take kfd processes mutex before starting of process creation
366 * so there won't be a case where two threads of the same process
367 * create two kfd_process structures
369 mutex_lock(&kfd_processes_mutex);
371 /* A prior open of /dev/kfd could have already created the process. */
372 process = find_process(thread);
374 pr_debug("Process already found\n");
376 process = create_process(thread);
380 ret = kfd_process_init_cwsr_apu(process, filep);
382 process = ERR_PTR(ret);
389 process->kobj = kfd_alloc_struct(process->kobj);
390 if (!process->kobj) {
391 pr_warn("Creating procfs kobject failed");
394 ret = kobject_init_and_add(process->kobj, &procfs_type,
396 (int)process->lead_thread->pid);
398 pr_warn("Creating procfs pid directory failed");
402 process->attr_pasid.name = "pasid";
403 process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
404 sysfs_attr_init(&process->attr_pasid);
405 ret = sysfs_create_file(process->kobj, &process->attr_pasid);
407 pr_warn("Creating pasid for pid %d failed",
408 (int)process->lead_thread->pid);
410 process->kobj_queues = kobject_create_and_add("queues",
412 if (!process->kobj_queues)
413 pr_warn("Creating KFD proc/queues folder failed");
416 if (!IS_ERR(process))
417 kref_get(&process->ref);
418 mutex_unlock(&kfd_processes_mutex);
423 struct kfd_process *kfd_get_process(const struct task_struct *thread)
425 struct kfd_process *process;
428 return ERR_PTR(-EINVAL);
430 /* Only the pthreads threading model is supported. */
431 if (thread->group_leader->mm != thread->mm)
432 return ERR_PTR(-EINVAL);
434 process = find_process(thread);
436 return ERR_PTR(-EINVAL);
441 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
443 struct kfd_process *process;
445 hash_for_each_possible_rcu(kfd_processes_table, process,
446 kfd_processes, (uintptr_t)mm)
447 if (process->mm == mm)
453 static struct kfd_process *find_process(const struct task_struct *thread)
455 struct kfd_process *p;
458 idx = srcu_read_lock(&kfd_processes_srcu);
459 p = find_process_by_mm(thread->mm);
460 srcu_read_unlock(&kfd_processes_srcu, idx);
465 void kfd_unref_process(struct kfd_process *p)
467 kref_put(&p->ref, kfd_process_ref_release);
470 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
472 struct kfd_process *p = pdd->process;
477 * Remove all handles from idr and release appropriate
478 * local memory object
480 idr_for_each_entry(&pdd->alloc_idr, mem, id) {
481 struct kfd_process_device *peer_pdd;
483 list_for_each_entry(peer_pdd, &p->per_device_data,
487 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
488 peer_pdd->dev->kgd, mem, peer_pdd->vm);
491 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
492 kfd_process_device_remove_obj_handle(pdd, id);
496 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
498 struct kfd_process_device *pdd;
500 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
501 kfd_process_device_free_bos(pdd);
504 static void kfd_process_destroy_pdds(struct kfd_process *p)
506 struct kfd_process_device *pdd, *temp;
508 list_for_each_entry_safe(pdd, temp, &p->per_device_data,
510 pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
511 pdd->dev->id, p->pasid);
514 amdgpu_amdkfd_gpuvm_release_process_vm(
515 pdd->dev->kgd, pdd->vm);
519 amdgpu_amdkfd_gpuvm_destroy_process_vm(
520 pdd->dev->kgd, pdd->vm);
522 list_del(&pdd->per_device_list);
524 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
525 free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
526 get_order(KFD_CWSR_TBA_TMA_SIZE));
528 kfree(pdd->qpd.doorbell_bitmap);
529 idr_destroy(&pdd->alloc_idr);
532 * before destroying pdd, make sure to report availability
535 if (pdd->runtime_inuse) {
536 pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
537 pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
538 pdd->runtime_inuse = false;
545 /* No process locking is needed in this function, because the process
546 * is not findable any more. We must assume that no other thread is
547 * using it any more, otherwise we couldn't safely free the process
548 * structure in the end.
550 static void kfd_process_wq_release(struct work_struct *work)
552 struct kfd_process *p = container_of(work, struct kfd_process,
555 /* Remove the procfs files */
557 sysfs_remove_file(p->kobj, &p->attr_pasid);
558 kobject_del(p->kobj_queues);
559 kobject_put(p->kobj_queues);
560 p->kobj_queues = NULL;
561 kobject_del(p->kobj);
562 kobject_put(p->kobj);
566 kfd_iommu_unbind_process(p);
568 kfd_process_free_outstanding_kfd_bos(p);
570 kfd_process_destroy_pdds(p);
571 dma_fence_put(p->ef);
573 kfd_event_free_process(p);
575 kfd_pasid_free(p->pasid);
576 kfd_free_process_doorbells(p);
578 mutex_destroy(&p->mutex);
580 put_task_struct(p->lead_thread);
585 static void kfd_process_ref_release(struct kref *ref)
587 struct kfd_process *p = container_of(ref, struct kfd_process, ref);
589 INIT_WORK(&p->release_work, kfd_process_wq_release);
590 queue_work(kfd_process_wq, &p->release_work);
593 static void kfd_process_free_notifier(struct mmu_notifier *mn)
595 kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
598 static void kfd_process_notifier_release(struct mmu_notifier *mn,
599 struct mm_struct *mm)
601 struct kfd_process *p;
602 struct kfd_process_device *pdd = NULL;
605 * The kfd_process structure can not be free because the
606 * mmu_notifier srcu is read locked
608 p = container_of(mn, struct kfd_process, mmu_notifier);
609 if (WARN_ON(p->mm != mm))
612 mutex_lock(&kfd_processes_mutex);
613 hash_del_rcu(&p->kfd_processes);
614 mutex_unlock(&kfd_processes_mutex);
615 synchronize_srcu(&kfd_processes_srcu);
617 cancel_delayed_work_sync(&p->eviction_work);
618 cancel_delayed_work_sync(&p->restore_work);
620 mutex_lock(&p->mutex);
622 /* Iterate over all process device data structures and if the
623 * pdd is in debug mode, we should first force unregistration,
624 * then we will be able to destroy the queues
626 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
627 struct kfd_dev *dev = pdd->dev;
629 mutex_lock(kfd_get_dbgmgr_mutex());
630 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
631 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
632 kfd_dbgmgr_destroy(dev->dbgmgr);
636 mutex_unlock(kfd_get_dbgmgr_mutex());
639 kfd_process_dequeue_from_all_devices(p);
642 /* Indicate to other users that MM is no longer valid */
644 /* Signal the eviction fence after user mode queues are
645 * destroyed. This allows any BOs to be freed without
646 * triggering pointless evictions or waiting for fences.
648 dma_fence_signal(p->ef);
650 mutex_unlock(&p->mutex);
652 mmu_notifier_put(&p->mmu_notifier);
655 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
656 .release = kfd_process_notifier_release,
657 .free_notifier = kfd_process_free_notifier,
660 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
662 unsigned long offset;
663 struct kfd_process_device *pdd;
665 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
666 struct kfd_dev *dev = pdd->dev;
667 struct qcm_process_device *qpd = &pdd->qpd;
669 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
672 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
673 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
674 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
677 if (IS_ERR_VALUE(qpd->tba_addr)) {
678 int err = qpd->tba_addr;
680 pr_err("Failure to set tba address. error %d.\n", err);
682 qpd->cwsr_kaddr = NULL;
686 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
688 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
689 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
690 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
696 static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
698 struct kfd_dev *dev = pdd->dev;
699 struct qcm_process_device *qpd = &pdd->qpd;
700 uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
701 | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
702 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
706 if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
709 /* cwsr_base is only set for dGPU */
710 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
711 KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
715 qpd->cwsr_kaddr = kaddr;
716 qpd->tba_addr = qpd->cwsr_base;
718 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
720 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
721 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
722 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
728 * On return the kfd_process is fully operational and will be freed when the
731 static struct kfd_process *create_process(const struct task_struct *thread)
733 struct kfd_process *process;
736 process = kzalloc(sizeof(*process), GFP_KERNEL);
738 goto err_alloc_process;
740 kref_init(&process->ref);
741 mutex_init(&process->mutex);
742 process->mm = thread->mm;
743 process->lead_thread = thread->group_leader;
744 INIT_LIST_HEAD(&process->per_device_data);
745 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
746 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
747 process->last_restore_timestamp = get_jiffies_64();
748 kfd_event_init_process(process);
749 process->is_32bit_user_mode = in_compat_syscall();
751 process->pasid = kfd_pasid_alloc();
752 if (process->pasid == 0)
753 goto err_alloc_pasid;
755 if (kfd_alloc_process_doorbells(process) < 0)
756 goto err_alloc_doorbells;
758 err = pqm_init(&process->pqm, process);
760 goto err_process_pqm_init;
762 /* init process apertures*/
763 err = kfd_init_apertures(process);
765 goto err_init_apertures;
767 /* Must be last, have to use release destruction after this */
768 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
769 err = mmu_notifier_register(&process->mmu_notifier, process->mm);
771 goto err_register_notifier;
773 get_task_struct(process->lead_thread);
774 hash_add_rcu(kfd_processes_table, &process->kfd_processes,
775 (uintptr_t)process->mm);
779 err_register_notifier:
780 kfd_process_free_outstanding_kfd_bos(process);
781 kfd_process_destroy_pdds(process);
783 pqm_uninit(&process->pqm);
784 err_process_pqm_init:
785 kfd_free_process_doorbells(process);
787 kfd_pasid_free(process->pasid);
789 mutex_destroy(&process->mutex);
795 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
799 int range_start = dev->shared_resources.non_cp_doorbells_start;
800 int range_end = dev->shared_resources.non_cp_doorbells_end;
802 if (!KFD_IS_SOC15(dev->device_info->asic_family))
805 qpd->doorbell_bitmap =
806 kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
807 BITS_PER_BYTE), GFP_KERNEL);
808 if (!qpd->doorbell_bitmap)
811 /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
812 pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
813 pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
814 range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
815 range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
817 for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
818 if (i >= range_start && i <= range_end) {
819 set_bit(i, qpd->doorbell_bitmap);
820 set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
821 qpd->doorbell_bitmap);
828 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
829 struct kfd_process *p)
831 struct kfd_process_device *pdd = NULL;
833 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
840 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
841 struct kfd_process *p)
843 struct kfd_process_device *pdd = NULL;
845 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
849 if (init_doorbell_bitmap(&pdd->qpd, dev)) {
850 pr_err("Failed to init doorbell for process\n");
856 INIT_LIST_HEAD(&pdd->qpd.queues_list);
857 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
858 pdd->qpd.dqm = dev->dqm;
859 pdd->qpd.pqm = &p->pqm;
860 pdd->qpd.evicted = 0;
862 pdd->bound = PDD_UNBOUND;
863 pdd->already_dequeued = false;
864 pdd->runtime_inuse = false;
865 list_add(&pdd->per_device_list, &p->per_device_data);
867 /* Init idr used for memory handle translation */
868 idr_init(&pdd->alloc_idr);
874 * kfd_process_device_init_vm - Initialize a VM for a process-device
876 * @pdd: The process-device
877 * @drm_file: Optional pointer to a DRM file descriptor
879 * If @drm_file is specified, it will be used to acquire the VM from
880 * that file descriptor. If successful, the @pdd takes ownership of
881 * the file descriptor.
883 * If @drm_file is NULL, a new VM is created.
885 * Returns 0 on success, -errno on failure.
887 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
888 struct file *drm_file)
890 struct kfd_process *p;
895 return drm_file ? -EBUSY : 0;
901 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
902 dev->kgd, drm_file, p->pasid,
903 &pdd->vm, &p->kgd_process_info, &p->ef);
905 ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
906 &pdd->vm, &p->kgd_process_info, &p->ef);
908 pr_err("Failed to create process VM object\n");
912 amdgpu_vm_set_task_info(pdd->vm);
914 ret = kfd_process_device_reserve_ib_mem(pdd);
916 goto err_reserve_ib_mem;
917 ret = kfd_process_device_init_cwsr_dgpu(pdd);
921 pdd->drm_file = drm_file;
927 kfd_process_device_free_bos(pdd);
929 amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
936 * Direct the IOMMU to bind the process (specifically the pasid->mm)
938 * Unbinding occurs when the process dies or the device is removed.
940 * Assumes that the process lock is held.
942 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
943 struct kfd_process *p)
945 struct kfd_process_device *pdd;
948 pdd = kfd_get_process_device_data(dev, p);
950 pr_err("Process device data doesn't exist\n");
951 return ERR_PTR(-ENOMEM);
955 * signal runtime-pm system to auto resume and prevent
956 * further runtime suspend once device pdd is created until
959 if (!pdd->runtime_inuse) {
960 err = pm_runtime_get_sync(dev->ddev->dev);
965 err = kfd_iommu_bind_process_to_device(pdd);
969 err = kfd_process_device_init_vm(pdd, NULL);
974 * make sure that runtime_usage counter is incremented just once
977 pdd->runtime_inuse = true;
982 /* balance runpm reference count and exit with error */
983 if (!pdd->runtime_inuse) {
984 pm_runtime_mark_last_busy(dev->ddev->dev);
985 pm_runtime_put_autosuspend(dev->ddev->dev);
991 struct kfd_process_device *kfd_get_first_process_device_data(
992 struct kfd_process *p)
994 return list_first_entry(&p->per_device_data,
995 struct kfd_process_device,
999 struct kfd_process_device *kfd_get_next_process_device_data(
1000 struct kfd_process *p,
1001 struct kfd_process_device *pdd)
1003 if (list_is_last(&pdd->per_device_list, &p->per_device_data))
1005 return list_next_entry(pdd, per_device_list);
1008 bool kfd_has_process_device_data(struct kfd_process *p)
1010 return !(list_empty(&p->per_device_data));
1013 /* Create specific handle mapped to mem from process local memory idr
1014 * Assumes that the process lock is held.
1016 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
1019 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
1022 /* Translate specific handle from process local memory idr
1023 * Assumes that the process lock is held.
1025 void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
1031 return idr_find(&pdd->alloc_idr, handle);
1034 /* Remove specific handle from process local memory idr
1035 * Assumes that the process lock is held.
1037 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
1041 idr_remove(&pdd->alloc_idr, handle);
1044 /* This increments the process->ref counter. */
1045 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
1047 struct kfd_process *p, *ret_p = NULL;
1050 int idx = srcu_read_lock(&kfd_processes_srcu);
1052 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1053 if (p->pasid == pasid) {
1060 srcu_read_unlock(&kfd_processes_srcu, idx);
1065 /* This increments the process->ref counter. */
1066 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
1068 struct kfd_process *p;
1070 int idx = srcu_read_lock(&kfd_processes_srcu);
1072 p = find_process_by_mm(mm);
1076 srcu_read_unlock(&kfd_processes_srcu, idx);
1081 /* process_evict_queues - Evict all user queues of a process
1083 * Eviction is reference-counted per process-device. This means multiple
1084 * evictions from different sources can be nested safely.
1086 int kfd_process_evict_queues(struct kfd_process *p)
1088 struct kfd_process_device *pdd;
1090 unsigned int n_evicted = 0;
1092 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1093 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
1096 pr_err("Failed to evict process queues\n");
1105 /* To keep state consistent, roll back partial eviction by
1108 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1111 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1113 pr_err("Failed to restore queues\n");
1121 /* process_restore_queues - Restore all user queues of a process */
1122 int kfd_process_restore_queues(struct kfd_process *p)
1124 struct kfd_process_device *pdd;
1127 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
1128 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
1131 pr_err("Failed to restore process queues\n");
1140 static void evict_process_worker(struct work_struct *work)
1143 struct kfd_process *p;
1144 struct delayed_work *dwork;
1146 dwork = to_delayed_work(work);
1148 /* Process termination destroys this worker thread. So during the
1149 * lifetime of this thread, kfd_process p will be valid
1151 p = container_of(dwork, struct kfd_process, eviction_work);
1152 WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
1153 "Eviction fence mismatch\n");
1155 /* Narrow window of overlap between restore and evict work
1156 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
1157 * unreserves KFD BOs, it is possible to evicted again. But
1158 * restore has few more steps of finish. So lets wait for any
1159 * previous restore work to complete
1161 flush_delayed_work(&p->restore_work);
1163 pr_debug("Started evicting pasid 0x%x\n", p->pasid);
1164 ret = kfd_process_evict_queues(p);
1166 dma_fence_signal(p->ef);
1167 dma_fence_put(p->ef);
1169 queue_delayed_work(kfd_restore_wq, &p->restore_work,
1170 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
1172 pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
1174 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
1177 static void restore_process_worker(struct work_struct *work)
1179 struct delayed_work *dwork;
1180 struct kfd_process *p;
1183 dwork = to_delayed_work(work);
1185 /* Process termination destroys this worker thread. So during the
1186 * lifetime of this thread, kfd_process p will be valid
1188 p = container_of(dwork, struct kfd_process, restore_work);
1189 pr_debug("Started restoring pasid 0x%x\n", p->pasid);
1191 /* Setting last_restore_timestamp before successful restoration.
1192 * Otherwise this would have to be set by KGD (restore_process_bos)
1193 * before KFD BOs are unreserved. If not, the process can be evicted
1194 * again before the timestamp is set.
1195 * If restore fails, the timestamp will be set again in the next
1196 * attempt. This would mean that the minimum GPU quanta would be
1197 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
1201 p->last_restore_timestamp = get_jiffies_64();
1202 ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
1205 pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
1206 p->pasid, PROCESS_BACK_OFF_TIME_MS);
1207 ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
1208 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
1209 WARN(!ret, "reschedule restore work failed\n");
1213 ret = kfd_process_restore_queues(p);
1215 pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
1217 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
1220 void kfd_suspend_all_processes(void)
1222 struct kfd_process *p;
1224 int idx = srcu_read_lock(&kfd_processes_srcu);
1226 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1227 cancel_delayed_work_sync(&p->eviction_work);
1228 cancel_delayed_work_sync(&p->restore_work);
1230 if (kfd_process_evict_queues(p))
1231 pr_err("Failed to suspend process 0x%x\n", p->pasid);
1232 dma_fence_signal(p->ef);
1233 dma_fence_put(p->ef);
1236 srcu_read_unlock(&kfd_processes_srcu, idx);
1239 int kfd_resume_all_processes(void)
1241 struct kfd_process *p;
1243 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
1245 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1246 if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
1247 pr_err("Restore process %d failed during resume\n",
1252 srcu_read_unlock(&kfd_processes_srcu, idx);
1256 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
1257 struct vm_area_struct *vma)
1259 struct kfd_process_device *pdd;
1260 struct qcm_process_device *qpd;
1262 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
1263 pr_err("Incorrect CWSR mapping size.\n");
1267 pdd = kfd_get_process_device_data(dev, process);
1272 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1273 get_order(KFD_CWSR_TBA_TMA_SIZE));
1274 if (!qpd->cwsr_kaddr) {
1275 pr_err("Error allocating per process CWSR buffer.\n");
1279 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
1280 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
1281 /* Mapping pages to user process */
1282 return remap_pfn_range(vma, vma->vm_start,
1283 PFN_DOWN(__pa(qpd->cwsr_kaddr)),
1284 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
1287 void kfd_flush_tlb(struct kfd_process_device *pdd)
1289 struct kfd_dev *dev = pdd->dev;
1291 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1292 /* Nothing to flush until a VMID is assigned, which
1293 * only happens when the first queue is created.
1296 amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
1299 amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
1300 pdd->process->pasid);
1304 #if defined(CONFIG_DEBUG_FS)
1306 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
1308 struct kfd_process *p;
1312 int idx = srcu_read_lock(&kfd_processes_srcu);
1314 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1315 seq_printf(m, "Process %d PASID 0x%x:\n",
1316 p->lead_thread->tgid, p->pasid);
1318 mutex_lock(&p->mutex);
1319 r = pqm_debugfs_mqds(m, &p->pqm);
1320 mutex_unlock(&p->mutex);
1326 srcu_read_unlock(&kfd_processes_srcu, idx);