]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Merge tag 'for-5.16-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_amdkfd.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include "amdgpu_amdkfd.h"
24 #include "amd_pcie.h"
25 #include "amd_shared.h"
26
27 #include "amdgpu.h"
28 #include "amdgpu_gfx.h"
29 #include "amdgpu_dma_buf.h"
30 #include <linux/module.h>
31 #include <linux/dma-buf.h>
32 #include "amdgpu_xgmi.h"
33 #include <uapi/linux/kfd_ioctl.h>
34 #include "amdgpu_ras.h"
35 #include "amdgpu_umc.h"
36
37 /* Total memory size in system memory and all GPU VRAM. Used to
38  * estimate worst case amount of memory to reserve for page tables
39  */
40 uint64_t amdgpu_amdkfd_total_mem_size;
41
42 static bool kfd_initialized;
43
44 int amdgpu_amdkfd_init(void)
45 {
46         struct sysinfo si;
47         int ret;
48
49         si_meminfo(&si);
50         amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
51         amdgpu_amdkfd_total_mem_size *= si.mem_unit;
52
53         ret = kgd2kfd_init();
54         amdgpu_amdkfd_gpuvm_init_mem_limits();
55         kfd_initialized = !ret;
56
57         return ret;
58 }
59
60 void amdgpu_amdkfd_fini(void)
61 {
62         if (kfd_initialized) {
63                 kgd2kfd_exit();
64                 kfd_initialized = false;
65         }
66 }
67
68 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
69 {
70         bool vf = amdgpu_sriov_vf(adev);
71
72         if (!kfd_initialized)
73                 return;
74
75         adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, vf);
76
77         if (adev->kfd.dev)
78                 amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
79 }
80
81 /**
82  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
83  *                                setup amdkfd
84  *
85  * @adev: amdgpu_device pointer
86  * @aperture_base: output returning doorbell aperture base physical address
87  * @aperture_size: output returning doorbell aperture size in bytes
88  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
89  *
90  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
91  * takes doorbells required for its own rings and reports the setup to amdkfd.
92  * amdgpu reserved doorbells are at the start of the doorbell aperture.
93  */
94 static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
95                                          phys_addr_t *aperture_base,
96                                          size_t *aperture_size,
97                                          size_t *start_offset)
98 {
99         /*
100          * The first num_doorbells are used by amdgpu.
101          * amdkfd takes whatever's left in the aperture.
102          */
103         if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
104                 *aperture_base = adev->doorbell.base;
105                 *aperture_size = adev->doorbell.size;
106                 *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
107         } else {
108                 *aperture_base = 0;
109                 *aperture_size = 0;
110                 *start_offset = 0;
111         }
112 }
113
114 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
115 {
116         int i;
117         int last_valid_bit;
118
119         if (adev->kfd.dev) {
120                 struct kgd2kfd_shared_resources gpu_resources = {
121                         .compute_vmid_bitmap =
122                                 ((1 << AMDGPU_NUM_VMID) - 1) -
123                                 ((1 << adev->vm_manager.first_kfd_vmid) - 1),
124                         .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
125                         .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
126                         .gpuvm_size = min(adev->vm_manager.max_pfn
127                                           << AMDGPU_GPU_PAGE_SHIFT,
128                                           AMDGPU_GMC_HOLE_START),
129                         .drm_render_minor = adev_to_drm(adev)->render->index,
130                         .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
131
132                 };
133
134                 /* this is going to have a few of the MSBs set that we need to
135                  * clear
136                  */
137                 bitmap_complement(gpu_resources.cp_queue_bitmap,
138                                   adev->gfx.mec.queue_bitmap,
139                                   KGD_MAX_QUEUES);
140
141                 /* According to linux/bitmap.h we shouldn't use bitmap_clear if
142                  * nbits is not compile time constant
143                  */
144                 last_valid_bit = 1 /* only first MEC can have compute queues */
145                                 * adev->gfx.mec.num_pipe_per_mec
146                                 * adev->gfx.mec.num_queue_per_pipe;
147                 for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
148                         clear_bit(i, gpu_resources.cp_queue_bitmap);
149
150                 amdgpu_doorbell_get_kfd_info(adev,
151                                 &gpu_resources.doorbell_physical_address,
152                                 &gpu_resources.doorbell_aperture_size,
153                                 &gpu_resources.doorbell_start_offset);
154
155                 /* Since SOC15, BIF starts to statically use the
156                  * lower 12 bits of doorbell addresses for routing
157                  * based on settings in registers like
158                  * SDMA0_DOORBELL_RANGE etc..
159                  * In order to route a doorbell to CP engine, the lower
160                  * 12 bits of its address has to be outside the range
161                  * set for SDMA, VCN, and IH blocks.
162                  */
163                 if (adev->asic_type >= CHIP_VEGA10) {
164                         gpu_resources.non_cp_doorbells_start =
165                                         adev->doorbell_index.first_non_cp;
166                         gpu_resources.non_cp_doorbells_end =
167                                         adev->doorbell_index.last_non_cp;
168                 }
169
170                 adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
171                                                 adev_to_drm(adev), &gpu_resources);
172         }
173 }
174
175 void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
176 {
177         if (adev->kfd.dev) {
178                 kgd2kfd_device_exit(adev->kfd.dev);
179                 adev->kfd.dev = NULL;
180         }
181 }
182
183 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
184                 const void *ih_ring_entry)
185 {
186         if (adev->kfd.dev)
187                 kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
188 }
189
190 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
191 {
192         if (adev->kfd.dev)
193                 kgd2kfd_suspend(adev->kfd.dev, run_pm);
194 }
195
196 int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
197 {
198         int r = 0;
199
200         if (adev->kfd.dev)
201                 r = kgd2kfd_resume_iommu(adev->kfd.dev);
202
203         return r;
204 }
205
206 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
207 {
208         int r = 0;
209
210         if (adev->kfd.dev)
211                 r = kgd2kfd_resume(adev->kfd.dev, run_pm);
212
213         return r;
214 }
215
216 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
217 {
218         int r = 0;
219
220         if (adev->kfd.dev)
221                 r = kgd2kfd_pre_reset(adev->kfd.dev);
222
223         return r;
224 }
225
226 int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
227 {
228         int r = 0;
229
230         if (adev->kfd.dev)
231                 r = kgd2kfd_post_reset(adev->kfd.dev);
232
233         return r;
234 }
235
236 void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
237 {
238         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
239
240         if (amdgpu_device_should_recover_gpu(adev))
241                 amdgpu_device_gpu_recover(adev, NULL);
242 }
243
244 int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
245                                 void **mem_obj, uint64_t *gpu_addr,
246                                 void **cpu_ptr, bool cp_mqd_gfx9)
247 {
248         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
249         struct amdgpu_bo *bo = NULL;
250         struct amdgpu_bo_param bp;
251         int r;
252         void *cpu_ptr_tmp = NULL;
253
254         memset(&bp, 0, sizeof(bp));
255         bp.size = size;
256         bp.byte_align = PAGE_SIZE;
257         bp.domain = AMDGPU_GEM_DOMAIN_GTT;
258         bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
259         bp.type = ttm_bo_type_kernel;
260         bp.resv = NULL;
261         bp.bo_ptr_size = sizeof(struct amdgpu_bo);
262
263         if (cp_mqd_gfx9)
264                 bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
265
266         r = amdgpu_bo_create(adev, &bp, &bo);
267         if (r) {
268                 dev_err(adev->dev,
269                         "failed to allocate BO for amdkfd (%d)\n", r);
270                 return r;
271         }
272
273         /* map the buffer */
274         r = amdgpu_bo_reserve(bo, true);
275         if (r) {
276                 dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
277                 goto allocate_mem_reserve_bo_failed;
278         }
279
280         r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
281         if (r) {
282                 dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
283                 goto allocate_mem_pin_bo_failed;
284         }
285
286         r = amdgpu_ttm_alloc_gart(&bo->tbo);
287         if (r) {
288                 dev_err(adev->dev, "%p bind failed\n", bo);
289                 goto allocate_mem_kmap_bo_failed;
290         }
291
292         r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
293         if (r) {
294                 dev_err(adev->dev,
295                         "(%d) failed to map bo to kernel for amdkfd\n", r);
296                 goto allocate_mem_kmap_bo_failed;
297         }
298
299         *mem_obj = bo;
300         *gpu_addr = amdgpu_bo_gpu_offset(bo);
301         *cpu_ptr = cpu_ptr_tmp;
302
303         amdgpu_bo_unreserve(bo);
304
305         return 0;
306
307 allocate_mem_kmap_bo_failed:
308         amdgpu_bo_unpin(bo);
309 allocate_mem_pin_bo_failed:
310         amdgpu_bo_unreserve(bo);
311 allocate_mem_reserve_bo_failed:
312         amdgpu_bo_unref(&bo);
313
314         return r;
315 }
316
317 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
318 {
319         struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
320
321         amdgpu_bo_reserve(bo, true);
322         amdgpu_bo_kunmap(bo);
323         amdgpu_bo_unpin(bo);
324         amdgpu_bo_unreserve(bo);
325         amdgpu_bo_unref(&(bo));
326 }
327
328 int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
329                                 void **mem_obj)
330 {
331         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
332         struct amdgpu_bo *bo = NULL;
333         struct amdgpu_bo_user *ubo;
334         struct amdgpu_bo_param bp;
335         int r;
336
337         memset(&bp, 0, sizeof(bp));
338         bp.size = size;
339         bp.byte_align = 1;
340         bp.domain = AMDGPU_GEM_DOMAIN_GWS;
341         bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
342         bp.type = ttm_bo_type_device;
343         bp.resv = NULL;
344         bp.bo_ptr_size = sizeof(struct amdgpu_bo);
345
346         r = amdgpu_bo_create_user(adev, &bp, &ubo);
347         if (r) {
348                 dev_err(adev->dev,
349                         "failed to allocate gws BO for amdkfd (%d)\n", r);
350                 return r;
351         }
352
353         bo = &ubo->bo;
354         *mem_obj = bo;
355         return 0;
356 }
357
358 void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
359 {
360         struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
361
362         amdgpu_bo_unref(&bo);
363 }
364
365 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
366                                       enum kgd_engine_type type)
367 {
368         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
369
370         switch (type) {
371         case KGD_ENGINE_PFP:
372                 return adev->gfx.pfp_fw_version;
373
374         case KGD_ENGINE_ME:
375                 return adev->gfx.me_fw_version;
376
377         case KGD_ENGINE_CE:
378                 return adev->gfx.ce_fw_version;
379
380         case KGD_ENGINE_MEC1:
381                 return adev->gfx.mec_fw_version;
382
383         case KGD_ENGINE_MEC2:
384                 return adev->gfx.mec2_fw_version;
385
386         case KGD_ENGINE_RLC:
387                 return adev->gfx.rlc_fw_version;
388
389         case KGD_ENGINE_SDMA1:
390                 return adev->sdma.instance[0].fw_version;
391
392         case KGD_ENGINE_SDMA2:
393                 return adev->sdma.instance[1].fw_version;
394
395         default:
396                 return 0;
397         }
398
399         return 0;
400 }
401
402 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
403                                       struct kfd_local_mem_info *mem_info)
404 {
405         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
406
407         memset(mem_info, 0, sizeof(*mem_info));
408
409         mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
410         mem_info->local_mem_size_private = adev->gmc.real_vram_size -
411                                                 adev->gmc.visible_vram_size;
412
413         mem_info->vram_width = adev->gmc.vram_width;
414
415         pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
416                         &adev->gmc.aper_base,
417                         mem_info->local_mem_size_public,
418                         mem_info->local_mem_size_private);
419
420         if (amdgpu_sriov_vf(adev))
421                 mem_info->mem_clk_max = adev->clock.default_mclk / 100;
422         else if (adev->pm.dpm_enabled) {
423                 if (amdgpu_emu_mode == 1)
424                         mem_info->mem_clk_max = 0;
425                 else
426                         mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
427         } else
428                 mem_info->mem_clk_max = 100;
429 }
430
431 uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
432 {
433         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
434
435         if (adev->gfx.funcs->get_gpu_clock_counter)
436                 return adev->gfx.funcs->get_gpu_clock_counter(adev);
437         return 0;
438 }
439
440 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
441 {
442         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
443
444         /* the sclk is in quantas of 10kHz */
445         if (amdgpu_sriov_vf(adev))
446                 return adev->clock.default_sclk / 100;
447         else if (adev->pm.dpm_enabled)
448                 return amdgpu_dpm_get_sclk(adev, false) / 100;
449         else
450                 return 100;
451 }
452
453 void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
454 {
455         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
456         struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
457
458         memset(cu_info, 0, sizeof(*cu_info));
459         if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
460                 return;
461
462         cu_info->cu_active_number = acu_info.number;
463         cu_info->cu_ao_mask = acu_info.ao_cu_mask;
464         memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
465                sizeof(acu_info.bitmap));
466         cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
467         cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
468         cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
469         cu_info->simd_per_cu = acu_info.simd_per_cu;
470         cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
471         cu_info->wave_front_size = acu_info.wave_front_size;
472         cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
473         cu_info->lds_size = acu_info.lds_size;
474 }
475
476 int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
477                                   struct kgd_dev **dma_buf_kgd,
478                                   uint64_t *bo_size, void *metadata_buffer,
479                                   size_t buffer_size, uint32_t *metadata_size,
480                                   uint32_t *flags)
481 {
482         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
483         struct dma_buf *dma_buf;
484         struct drm_gem_object *obj;
485         struct amdgpu_bo *bo;
486         uint64_t metadata_flags;
487         int r = -EINVAL;
488
489         dma_buf = dma_buf_get(dma_buf_fd);
490         if (IS_ERR(dma_buf))
491                 return PTR_ERR(dma_buf);
492
493         if (dma_buf->ops != &amdgpu_dmabuf_ops)
494                 /* Can't handle non-graphics buffers */
495                 goto out_put;
496
497         obj = dma_buf->priv;
498         if (obj->dev->driver != adev_to_drm(adev)->driver)
499                 /* Can't handle buffers from different drivers */
500                 goto out_put;
501
502         adev = drm_to_adev(obj->dev);
503         bo = gem_to_amdgpu_bo(obj);
504         if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
505                                     AMDGPU_GEM_DOMAIN_GTT)))
506                 /* Only VRAM and GTT BOs are supported */
507                 goto out_put;
508
509         r = 0;
510         if (dma_buf_kgd)
511                 *dma_buf_kgd = (struct kgd_dev *)adev;
512         if (bo_size)
513                 *bo_size = amdgpu_bo_size(bo);
514         if (metadata_buffer)
515                 r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
516                                            metadata_size, &metadata_flags);
517         if (flags) {
518                 *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
519                                 KFD_IOC_ALLOC_MEM_FLAGS_VRAM
520                                 : KFD_IOC_ALLOC_MEM_FLAGS_GTT;
521
522                 if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
523                         *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
524         }
525
526 out_put:
527         dma_buf_put(dma_buf);
528         return r;
529 }
530
531 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
532 {
533         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
534         struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
535
536         return amdgpu_vram_mgr_usage(vram_man);
537 }
538
539 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
540 {
541         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
542
543         return adev->gmc.xgmi.hive_id;
544 }
545
546 uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
547 {
548         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
549
550         return adev->unique_id;
551 }
552
553 uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
554 {
555         struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
556         struct amdgpu_device *adev = (struct amdgpu_device *)dst;
557         int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
558
559         if (ret < 0) {
560                 DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
561                         adev->gmc.xgmi.physical_node_id,
562                         peer_adev->gmc.xgmi.physical_node_id, ret);
563                 ret = 0;
564         }
565         return  (uint8_t)ret;
566 }
567
568 int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min)
569 {
570         struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev;
571         int num_links;
572
573         if (adev->asic_type != CHIP_ALDEBARAN)
574                 return 0;
575
576         if (src)
577                 peer_adev = (struct amdgpu_device *)src;
578
579         /* num links returns 0 for indirect peers since indirect route is unknown. */
580         num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
581         if (num_links < 0) {
582                 DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
583                         adev->gmc.xgmi.physical_node_id,
584                         peer_adev->gmc.xgmi.physical_node_id, num_links);
585                 num_links = 0;
586         }
587
588         /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
589         return (num_links * 16 * 25000)/BITS_PER_BYTE;
590 }
591
592 int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min)
593 {
594         struct amdgpu_device *adev = (struct amdgpu_device *)dev;
595         int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
596                                                         fls(adev->pm.pcie_mlw_mask)) - 1;
597         int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
598                                                 CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
599                                         fls(adev->pm.pcie_gen_mask &
600                                                 CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
601         uint32_t num_lanes_mask = 1 << num_lanes_shift;
602         uint32_t gen_speed_mask = 1 << gen_speed_shift;
603         int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
604
605         switch (num_lanes_mask) {
606         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
607                 num_lanes_factor = 1;
608                 break;
609         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
610                 num_lanes_factor = 2;
611                 break;
612         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
613                 num_lanes_factor = 4;
614                 break;
615         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
616                 num_lanes_factor = 8;
617                 break;
618         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
619                 num_lanes_factor = 12;
620                 break;
621         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
622                 num_lanes_factor = 16;
623                 break;
624         case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
625                 num_lanes_factor = 32;
626                 break;
627         }
628
629         switch (gen_speed_mask) {
630         case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
631                 gen_speed_mbits_factor = 2500;
632                 break;
633         case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
634                 gen_speed_mbits_factor = 5000;
635                 break;
636         case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
637                 gen_speed_mbits_factor = 8000;
638                 break;
639         case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
640                 gen_speed_mbits_factor = 16000;
641                 break;
642         case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
643                 gen_speed_mbits_factor = 32000;
644                 break;
645         }
646
647         return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
648 }
649
650 uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
651 {
652         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
653
654         return adev->rmmio_remap.bus_addr;
655 }
656
657 uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
658 {
659         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
660
661         return adev->gds.gws_size;
662 }
663
664 uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
665 {
666         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
667
668         return adev->rev_id;
669 }
670
671 int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
672 {
673         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
674
675         return adev->gmc.noretry;
676 }
677
678 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
679                                 uint32_t vmid, uint64_t gpu_addr,
680                                 uint32_t *ib_cmd, uint32_t ib_len)
681 {
682         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
683         struct amdgpu_job *job;
684         struct amdgpu_ib *ib;
685         struct amdgpu_ring *ring;
686         struct dma_fence *f = NULL;
687         int ret;
688
689         switch (engine) {
690         case KGD_ENGINE_MEC1:
691                 ring = &adev->gfx.compute_ring[0];
692                 break;
693         case KGD_ENGINE_SDMA1:
694                 ring = &adev->sdma.instance[0].ring;
695                 break;
696         case KGD_ENGINE_SDMA2:
697                 ring = &adev->sdma.instance[1].ring;
698                 break;
699         default:
700                 pr_err("Invalid engine in IB submission: %d\n", engine);
701                 ret = -EINVAL;
702                 goto err;
703         }
704
705         ret = amdgpu_job_alloc(adev, 1, &job, NULL);
706         if (ret)
707                 goto err;
708
709         ib = &job->ibs[0];
710         memset(ib, 0, sizeof(struct amdgpu_ib));
711
712         ib->gpu_addr = gpu_addr;
713         ib->ptr = ib_cmd;
714         ib->length_dw = ib_len;
715         /* This works for NO_HWS. TODO: need to handle without knowing VMID */
716         job->vmid = vmid;
717
718         ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
719
720         if (ret) {
721                 DRM_ERROR("amdgpu: failed to schedule IB.\n");
722                 goto err_ib_sched;
723         }
724
725         ret = dma_fence_wait(f, false);
726
727 err_ib_sched:
728         amdgpu_job_free(job);
729 err:
730         return ret;
731 }
732
733 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
734 {
735         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
736
737         amdgpu_dpm_switch_power_profile(adev,
738                                         PP_SMC_POWER_PROFILE_COMPUTE,
739                                         !idle);
740 }
741
742 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
743 {
744         if (adev->kfd.dev)
745                 return vmid >= adev->vm_manager.first_kfd_vmid;
746
747         return false;
748 }
749
750 int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
751 {
752         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
753
754         if (adev->family == AMDGPU_FAMILY_AI) {
755                 int i;
756
757                 for (i = 0; i < adev->num_vmhubs; i++)
758                         amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
759         } else {
760                 amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
761         }
762
763         return 0;
764 }
765
766 int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid,
767                                       enum TLB_FLUSH_TYPE flush_type)
768 {
769         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
770         bool all_hub = false;
771
772         if (adev->family == AMDGPU_FAMILY_AI)
773                 all_hub = true;
774
775         return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
776 }
777
778 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
779 {
780         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
781
782         return adev->have_atomics_support;
783 }
784
785 void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd)
786 {
787         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
788         struct ras_err_data err_data = {0, 0, 0, NULL};
789
790         /* CPU MCA will handle page retirement if connected_to_cpu is 1 */
791         if (!adev->gmc.xgmi.connected_to_cpu)
792                 amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL);
793         else
794                 amdgpu_amdkfd_gpu_reset(kgd);
795 }
This page took 0.082248 seconds and 4 git commands to generate.