1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
25 #include <linux/mman.h>
26 #include <linux/slab.h>
28 #include <linux/idr.h>
31 * This extension supports a kernel level doorbells management for the
32 * kernel queues using the first doorbell page reserved for the kernel.
36 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
37 * receives 32-bit writes that are passed to queues as wptr values.
38 * The doorbells are intended to be written by applications as part
39 * of queueing work on user-mode queues.
40 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
41 * We map the doorbell address space into user-mode when a process creates
42 * its first queue on each device.
43 * Although the mapping is done by KFD, it is equivalent to an mmap of
44 * the /dev/kfd with the particular device encoded in the mmap offset.
45 * There will be other uses for mmap of /dev/kfd, so only a range of
46 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
49 /* # of doorbell bytes allocated for each process. */
50 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
52 if (!kfd->shared_resources.enable_mes)
53 return roundup(kfd->device_info.doorbell_size *
54 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
57 return amdgpu_mes_doorbell_process_slice(
58 (struct amdgpu_device *)kfd->adev);
61 /* Doorbell calculations for device init. */
62 int kfd_doorbell_init(struct kfd_dev *kfd)
64 size_t doorbell_start_offset;
65 size_t doorbell_aperture_size;
66 size_t doorbell_process_limit;
69 * With MES enabled, just set the doorbell base as it is needed
70 * to calculate doorbell physical address.
72 if (kfd->shared_resources.enable_mes) {
74 kfd->shared_resources.doorbell_physical_address;
79 * We start with calculations in bytes because the input data might
80 * only be byte-aligned.
81 * Only after we have done the rounding can we assume any alignment.
84 doorbell_start_offset =
85 roundup(kfd->shared_resources.doorbell_start_offset,
86 kfd_doorbell_process_slice(kfd));
88 doorbell_aperture_size =
89 rounddown(kfd->shared_resources.doorbell_aperture_size,
90 kfd_doorbell_process_slice(kfd));
92 if (doorbell_aperture_size > doorbell_start_offset)
93 doorbell_process_limit =
94 (doorbell_aperture_size - doorbell_start_offset) /
95 kfd_doorbell_process_slice(kfd);
99 if (!kfd->max_doorbell_slices ||
100 doorbell_process_limit < kfd->max_doorbell_slices)
101 kfd->max_doorbell_slices = doorbell_process_limit;
103 kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
104 doorbell_start_offset;
106 kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
108 kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
109 kfd_doorbell_process_slice(kfd));
111 if (!kfd->doorbell_kernel_ptr)
114 pr_debug("Doorbell initialization:\n");
115 pr_debug("doorbell base == 0x%08lX\n",
116 (uintptr_t)kfd->doorbell_base);
118 pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
119 kfd->doorbell_base_dw_offset);
121 pr_debug("doorbell_process_limit == 0x%08lX\n",
122 doorbell_process_limit);
124 pr_debug("doorbell_kernel_offset == 0x%08lX\n",
125 (uintptr_t)kfd->doorbell_base);
127 pr_debug("doorbell aperture size == 0x%08lX\n",
128 kfd->shared_resources.doorbell_aperture_size);
130 pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
135 void kfd_doorbell_fini(struct kfd_dev *kfd)
137 if (kfd->doorbell_kernel_ptr)
138 iounmap(kfd->doorbell_kernel_ptr);
141 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
142 struct vm_area_struct *vma)
145 struct kfd_process_device *pdd;
148 * For simplicitly we only allow mapping of the entire doorbell
149 * allocation of a single device & process.
151 if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
154 pdd = kfd_get_process_device_data(dev, process);
158 /* Calculate physical address of doorbell */
159 address = kfd_get_process_doorbells(pdd);
160 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
161 VM_DONTDUMP | VM_PFNMAP;
163 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
165 pr_debug("Mapping doorbell page\n"
166 " target user address == 0x%08llX\n"
167 " physical address == 0x%08llX\n"
168 " vm_flags == 0x%04lX\n"
169 " size == 0x%04lX\n",
170 (unsigned long long) vma->vm_start, address, vma->vm_flags,
171 kfd_doorbell_process_slice(dev));
174 return io_remap_pfn_range(vma,
176 address >> PAGE_SHIFT,
177 kfd_doorbell_process_slice(dev),
182 /* get kernel iomem pointer for a doorbell */
183 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
184 unsigned int *doorbell_off)
188 mutex_lock(&kfd->doorbell_mutex);
189 inx = find_first_zero_bit(kfd->doorbell_available_index,
190 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
192 __set_bit(inx, kfd->doorbell_available_index);
193 mutex_unlock(&kfd->doorbell_mutex);
195 if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
198 inx *= kfd->device_info.doorbell_size / sizeof(u32);
201 * Calculating the kernel doorbell offset using the first
204 *doorbell_off = kfd->doorbell_base_dw_offset + inx;
206 pr_debug("Get kernel queue doorbell\n"
207 " doorbell offset == 0x%08X\n"
208 " doorbell index == 0x%x\n",
211 return kfd->doorbell_kernel_ptr + inx;
214 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
218 inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
219 * sizeof(u32) / kfd->device_info.doorbell_size;
221 mutex_lock(&kfd->doorbell_mutex);
222 __clear_bit(inx, kfd->doorbell_available_index);
223 mutex_unlock(&kfd->doorbell_mutex);
226 void write_kernel_doorbell(void __iomem *db, u32 value)
230 pr_debug("Writing %d to doorbell address %p\n", value, db);
234 void write_kernel_doorbell64(void __iomem *db, u64 value)
237 WARN(((unsigned long)db & 7) != 0,
238 "Unaligned 64-bit doorbell");
239 writeq(value, (u64 __iomem *)db);
240 pr_debug("writing %llu to doorbell address %p\n", value, db);
244 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
245 struct kfd_process_device *pdd,
246 unsigned int doorbell_id)
249 * doorbell_base_dw_offset accounts for doorbells taken by KGD.
250 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
251 * the process's doorbells. The offset returned is in dword
252 * units regardless of the ASIC-dependent doorbell size.
254 if (!kfd->shared_resources.enable_mes)
255 return kfd->doorbell_base_dw_offset +
257 * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
259 kfd->device_info.doorbell_size / sizeof(u32);
261 return amdgpu_mes_get_doorbell_dw_offset_in_bar(
262 (struct amdgpu_device *)kfd->adev,
263 pdd->doorbell_index, doorbell_id);
266 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
268 uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
269 kfd->shared_resources.doorbell_start_offset) /
270 kfd_doorbell_process_slice(kfd) + 1;
276 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
278 return pdd->dev->doorbell_base +
279 pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
282 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
286 if (!kfd->shared_resources.enable_mes)
287 r = ida_simple_get(&kfd->doorbell_ida, 1,
288 kfd->max_doorbell_slices, GFP_KERNEL);
290 r = amdgpu_mes_alloc_process_doorbells(
291 (struct amdgpu_device *)kfd->adev,
300 void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
302 if (doorbell_index) {
303 if (!kfd->shared_resources.enable_mes)
304 ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
306 amdgpu_mes_free_process_doorbells(
307 (struct amdgpu_device *)kfd->adev,