]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
crypto: akcipher - Drop sign/verify operations
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_doorbell.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "kfd_priv.h"
24 #include <linux/mm.h>
25 #include <linux/mman.h>
26 #include <linux/slab.h>
27 #include <linux/io.h>
28 #include <linux/idr.h>
29
30 /*
31  * This extension supports a kernel level doorbells management for the
32  * kernel queues using the first doorbell page reserved for the kernel.
33  */
34
35 /*
36  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
37  * receives 32-bit writes that are passed to queues as wptr values.
38  * The doorbells are intended to be written by applications as part
39  * of queueing work on user-mode queues.
40  * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
41  * We map the doorbell address space into user-mode when a process creates
42  * its first queue on each device.
43  * Although the mapping is done by KFD, it is equivalent to an mmap of
44  * the /dev/kfd with the particular device encoded in the mmap offset.
45  * There will be other uses for mmap of /dev/kfd, so only a range of
46  * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
47  */
48
49 /* # of doorbell bytes allocated for each process. */
50 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
51 {
52         if (!kfd->shared_resources.enable_mes)
53                 return roundup(kfd->device_info.doorbell_size *
54                                 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
55                                 PAGE_SIZE);
56         else
57                 return amdgpu_mes_doorbell_process_slice(
58                                         (struct amdgpu_device *)kfd->adev);
59 }
60
61 /* Doorbell calculations for device init. */
62 int kfd_doorbell_init(struct kfd_dev *kfd)
63 {
64         int size = PAGE_SIZE;
65         int r;
66
67         /*
68          * Todo: KFD kernel level operations need only one doorbell for
69          * ring test/HWS. So instead of reserving a whole page here for
70          * kernel, reserve and consume a doorbell from existing KGD kernel
71          * doorbell page.
72          */
73
74         /* Bitmap to dynamically allocate doorbells from kernel page */
75         kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
76         if (!kfd->doorbell_bitmap) {
77                 DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
78                 return -ENOMEM;
79         }
80
81         /* Alloc a doorbell page for KFD kernel usages */
82         r = amdgpu_bo_create_kernel(kfd->adev,
83                                     size,
84                                     PAGE_SIZE,
85                                     AMDGPU_GEM_DOMAIN_DOORBELL,
86                                     &kfd->doorbells,
87                                     NULL,
88                                     (void **)&kfd->doorbell_kernel_ptr);
89         if (r) {
90                 pr_err("failed to allocate kernel doorbells\n");
91                 bitmap_free(kfd->doorbell_bitmap);
92                 return r;
93         }
94
95         pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
96         return 0;
97 }
98
99 void kfd_doorbell_fini(struct kfd_dev *kfd)
100 {
101         bitmap_free(kfd->doorbell_bitmap);
102         amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
103                              (void **)&kfd->doorbell_kernel_ptr);
104 }
105
106 int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
107                       struct vm_area_struct *vma)
108 {
109         phys_addr_t address;
110         struct kfd_process_device *pdd;
111
112         /*
113          * For simplicitly we only allow mapping of the entire doorbell
114          * allocation of a single device & process.
115          */
116         if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd))
117                 return -EINVAL;
118
119         pdd = kfd_get_process_device_data(dev, process);
120         if (!pdd)
121                 return -EINVAL;
122
123         /* Calculate physical address of doorbell */
124         address = kfd_get_process_doorbells(pdd);
125         if (!address)
126                 return -ENOMEM;
127         vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
128                                 VM_DONTDUMP | VM_PFNMAP);
129
130         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
131
132         pr_debug("Mapping doorbell page\n"
133                  "     target user address == 0x%08llX\n"
134                  "     physical address    == 0x%08llX\n"
135                  "     vm_flags            == 0x%04lX\n"
136                  "     size                == 0x%04lX\n",
137                  (unsigned long long) vma->vm_start, address, vma->vm_flags,
138                  kfd_doorbell_process_slice(dev->kfd));
139
140
141         return io_remap_pfn_range(vma,
142                                 vma->vm_start,
143                                 address >> PAGE_SHIFT,
144                                 kfd_doorbell_process_slice(dev->kfd),
145                                 vma->vm_page_prot);
146 }
147
148
149 /* get kernel iomem pointer for a doorbell */
150 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
151                                         unsigned int *doorbell_off)
152 {
153         u32 inx;
154
155         mutex_lock(&kfd->doorbell_mutex);
156         inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
157
158         __set_bit(inx, kfd->doorbell_bitmap);
159         mutex_unlock(&kfd->doorbell_mutex);
160
161         if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
162                 return NULL;
163
164         *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
165                                                      kfd->doorbells,
166                                                      inx,
167                                                      kfd->device_info.doorbell_size);
168         inx *= 2;
169
170         pr_debug("Get kernel queue doorbell\n"
171                         "     doorbell offset   == 0x%08X\n"
172                         "     doorbell index    == 0x%x\n",
173                 *doorbell_off, inx);
174
175         return kfd->doorbell_kernel_ptr + inx;
176 }
177
178 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
179 {
180         unsigned int inx;
181
182         inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
183         inx /= 2;
184
185         mutex_lock(&kfd->doorbell_mutex);
186         __clear_bit(inx, kfd->doorbell_bitmap);
187         mutex_unlock(&kfd->doorbell_mutex);
188 }
189
190 void write_kernel_doorbell(void __iomem *db, u32 value)
191 {
192         if (db) {
193                 writel(value, db);
194                 pr_debug("Writing %d to doorbell address %p\n", value, db);
195         }
196 }
197
198 void write_kernel_doorbell64(void __iomem *db, u64 value)
199 {
200         if (db) {
201                 WARN(((unsigned long)db & 7) != 0,
202                      "Unaligned 64-bit doorbell");
203                 writeq(value, (u64 __iomem *)db);
204                 pr_debug("writing %llu to doorbell address %p\n", value, db);
205         }
206 }
207
208 static int init_doorbell_bitmap(struct qcm_process_device *qpd,
209                                 struct kfd_dev *dev)
210 {
211         unsigned int i;
212         int range_start = dev->shared_resources.non_cp_doorbells_start;
213         int range_end = dev->shared_resources.non_cp_doorbells_end;
214
215         if (!KFD_IS_SOC15(dev))
216                 return 0;
217
218         /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
219         pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
220         pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
221                         range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
222                         range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
223
224         for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
225                 if (i >= range_start && i <= range_end) {
226                         __set_bit(i, qpd->doorbell_bitmap);
227                         __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
228                                   qpd->doorbell_bitmap);
229                 }
230         }
231
232         return 0;
233 }
234
235 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
236 {
237         struct amdgpu_device *adev = pdd->dev->adev;
238         uint32_t first_db_index;
239
240         if (!pdd->qpd.proc_doorbells) {
241                 if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
242                         /* phys_addr_t 0 is error */
243                         return 0;
244         }
245
246         first_db_index = amdgpu_doorbell_index_on_bar(adev,
247                                                       pdd->qpd.proc_doorbells,
248                                                       0,
249                                                       pdd->dev->kfd->device_info.doorbell_size);
250         return adev->doorbell.base + first_db_index * sizeof(uint32_t);
251 }
252
253 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
254 {
255         int r;
256         struct qcm_process_device *qpd = &pdd->qpd;
257
258         /* Allocate bitmap for dynamic doorbell allocation */
259         qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
260                                              GFP_KERNEL);
261         if (!qpd->doorbell_bitmap) {
262                 DRM_ERROR("Failed to allocate process doorbell bitmap\n");
263                 return -ENOMEM;
264         }
265
266         r = init_doorbell_bitmap(&pdd->qpd, kfd);
267         if (r) {
268                 DRM_ERROR("Failed to initialize process doorbells\n");
269                 r = -ENOMEM;
270                 goto err;
271         }
272
273         /* Allocate doorbells for this process */
274         r = amdgpu_bo_create_kernel(kfd->adev,
275                                     kfd_doorbell_process_slice(kfd),
276                                     PAGE_SIZE,
277                                     AMDGPU_GEM_DOMAIN_DOORBELL,
278                                     &qpd->proc_doorbells,
279                                     NULL,
280                                     NULL);
281         if (r) {
282                 DRM_ERROR("Failed to allocate process doorbells\n");
283                 goto err;
284         }
285         return 0;
286
287 err:
288         bitmap_free(qpd->doorbell_bitmap);
289         qpd->doorbell_bitmap = NULL;
290         return r;
291 }
292
293 void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
294 {
295         struct qcm_process_device *qpd = &pdd->qpd;
296
297         if (qpd->doorbell_bitmap) {
298                 bitmap_free(qpd->doorbell_bitmap);
299                 qpd->doorbell_bitmap = NULL;
300         }
301
302         amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
303 }
This page took 0.050351 seconds and 4 git commands to generate.