]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drm/amdgpu: use doorbell mgr for MES kernel doorbells
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_mes.h
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #ifndef __AMDGPU_MES_H__
25 #define __AMDGPU_MES_H__
26
27 #include "amdgpu_irq.h"
28 #include "kgd_kfd_interface.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_doorbell.h"
31 #include <linux/sched/mm.h>
32
33 #define AMDGPU_MES_MAX_COMPUTE_PIPES        8
34 #define AMDGPU_MES_MAX_GFX_PIPES            2
35 #define AMDGPU_MES_MAX_SDMA_PIPES           2
36
37 #define AMDGPU_MES_API_VERSION_SHIFT    12
38 #define AMDGPU_MES_FEAT_VERSION_SHIFT   24
39
40 #define AMDGPU_MES_VERSION_MASK         0x00000fff
41 #define AMDGPU_MES_API_VERSION_MASK     0x00fff000
42 #define AMDGPU_MES_FEAT_VERSION_MASK    0xff000000
43
44 enum amdgpu_mes_priority_level {
45         AMDGPU_MES_PRIORITY_LEVEL_LOW       = 0,
46         AMDGPU_MES_PRIORITY_LEVEL_NORMAL    = 1,
47         AMDGPU_MES_PRIORITY_LEVEL_MEDIUM    = 2,
48         AMDGPU_MES_PRIORITY_LEVEL_HIGH      = 3,
49         AMDGPU_MES_PRIORITY_LEVEL_REALTIME  = 4,
50         AMDGPU_MES_PRIORITY_NUM_LEVELS
51 };
52
53 #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
54 #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
55
56 struct amdgpu_mes_funcs;
57
58 enum admgpu_mes_pipe {
59         AMDGPU_MES_SCHED_PIPE = 0,
60         AMDGPU_MES_KIQ_PIPE,
61         AMDGPU_MAX_MES_PIPES = 2,
62 };
63
64 struct amdgpu_mes {
65         struct amdgpu_device            *adev;
66
67         struct mutex                    mutex_hidden;
68
69         struct idr                      pasid_idr;
70         struct idr                      gang_id_idr;
71         struct idr                      queue_id_idr;
72         struct ida                      doorbell_ida;
73
74         spinlock_t                      queue_id_lock;
75
76         uint32_t                        sched_version;
77         uint32_t                        kiq_version;
78
79         uint32_t                        total_max_queue;
80         uint32_t                        doorbell_id_offset;
81         uint32_t                        max_doorbell_slices;
82
83         uint64_t                        default_process_quantum;
84         uint64_t                        default_gang_quantum;
85
86         struct amdgpu_ring              ring;
87         spinlock_t                      ring_lock;
88
89         const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
90
91         /* mes ucode */
92         struct amdgpu_bo                *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
93         uint64_t                        ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
94         uint32_t                        *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
95         uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
96
97         /* mes ucode data */
98         struct amdgpu_bo                *data_fw_obj[AMDGPU_MAX_MES_PIPES];
99         uint64_t                        data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
100         uint32_t                        *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
101         uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
102
103         /* eop gpu obj */
104         struct amdgpu_bo                *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
105         uint64_t                        eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
106
107         void                            *mqd_backup[AMDGPU_MAX_MES_PIPES];
108         struct amdgpu_irq_src           irq[AMDGPU_MAX_MES_PIPES];
109
110         uint32_t                        vmid_mask_gfxhub;
111         uint32_t                        vmid_mask_mmhub;
112         uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
113         uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
114         uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
115         uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
116         uint32_t                        sch_ctx_offs;
117         uint64_t                        sch_ctx_gpu_addr;
118         uint64_t                        *sch_ctx_ptr;
119         uint32_t                        query_status_fence_offs;
120         uint64_t                        query_status_fence_gpu_addr;
121         uint64_t                        *query_status_fence_ptr;
122         uint32_t                        read_val_offs;
123         uint64_t                        read_val_gpu_addr;
124         uint32_t                        *read_val_ptr;
125
126         uint32_t                        saved_flags;
127
128         /* initialize kiq pipe */
129         int                             (*kiq_hw_init)(struct amdgpu_device *adev);
130         int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
131
132         /* MES doorbells */
133         uint32_t                        db_start_dw_offset;
134         uint32_t                        num_mes_dbs;
135         unsigned long                   *doorbell_bitmap;
136
137         /* ip specific functions */
138         const struct amdgpu_mes_funcs   *funcs;
139 };
140
141 struct amdgpu_mes_process {
142         int                     pasid;
143         struct                  amdgpu_vm *vm;
144         uint64_t                pd_gpu_addr;
145         struct amdgpu_bo        *proc_ctx_bo;
146         uint64_t                proc_ctx_gpu_addr;
147         void                    *proc_ctx_cpu_ptr;
148         uint64_t                process_quantum;
149         struct                  list_head gang_list;
150         uint32_t                doorbell_index;
151         unsigned long           *doorbell_bitmap;
152         struct mutex            doorbell_lock;
153 };
154
155 struct amdgpu_mes_gang {
156         int                             gang_id;
157         int                             priority;
158         int                             inprocess_gang_priority;
159         int                             global_priority_level;
160         struct list_head                list;
161         struct amdgpu_mes_process       *process;
162         struct amdgpu_bo                *gang_ctx_bo;
163         uint64_t                        gang_ctx_gpu_addr;
164         void                            *gang_ctx_cpu_ptr;
165         uint64_t                        gang_quantum;
166         struct list_head                queue_list;
167 };
168
169 struct amdgpu_mes_queue {
170         struct list_head                list;
171         struct amdgpu_mes_gang          *gang;
172         int                             queue_id;
173         uint64_t                        doorbell_off;
174         struct amdgpu_bo                *mqd_obj;
175         void                            *mqd_cpu_ptr;
176         uint64_t                        mqd_gpu_addr;
177         uint64_t                        wptr_gpu_addr;
178         int                             queue_type;
179         int                             paging;
180         struct amdgpu_ring              *ring;
181 };
182
183 struct amdgpu_mes_queue_properties {
184         int                     queue_type;
185         uint64_t                hqd_base_gpu_addr;
186         uint64_t                rptr_gpu_addr;
187         uint64_t                wptr_gpu_addr;
188         uint64_t                wptr_mc_addr;
189         uint32_t                queue_size;
190         uint64_t                eop_gpu_addr;
191         uint32_t                hqd_pipe_priority;
192         uint32_t                hqd_queue_priority;
193         bool                    paging;
194         struct amdgpu_ring      *ring;
195         /* out */
196         uint64_t                doorbell_off;
197 };
198
199 struct amdgpu_mes_gang_properties {
200         uint32_t        priority;
201         uint32_t        gang_quantum;
202         uint32_t        inprocess_gang_priority;
203         uint32_t        priority_level;
204         int             global_priority_level;
205 };
206
207 struct mes_add_queue_input {
208         uint32_t        process_id;
209         uint64_t        page_table_base_addr;
210         uint64_t        process_va_start;
211         uint64_t        process_va_end;
212         uint64_t        process_quantum;
213         uint64_t        process_context_addr;
214         uint64_t        gang_quantum;
215         uint64_t        gang_context_addr;
216         uint32_t        inprocess_gang_priority;
217         uint32_t        gang_global_priority_level;
218         uint32_t        doorbell_offset;
219         uint64_t        mqd_addr;
220         uint64_t        wptr_addr;
221         uint64_t        wptr_mc_addr;
222         uint32_t        queue_type;
223         uint32_t        paging;
224         uint32_t        gws_base;
225         uint32_t        gws_size;
226         uint64_t        tba_addr;
227         uint64_t        tma_addr;
228         uint32_t        trap_en;
229         uint32_t        skip_process_ctx_clear;
230         uint32_t        is_kfd_process;
231         uint32_t        is_aql_queue;
232         uint32_t        queue_size;
233         uint32_t        exclusively_scheduled;
234 };
235
236 struct mes_remove_queue_input {
237         uint32_t        doorbell_offset;
238         uint64_t        gang_context_addr;
239 };
240
241 struct mes_unmap_legacy_queue_input {
242         enum amdgpu_unmap_queues_action    action;
243         uint32_t                           queue_type;
244         uint32_t                           doorbell_offset;
245         uint32_t                           pipe_id;
246         uint32_t                           queue_id;
247         uint64_t                           trail_fence_addr;
248         uint64_t                           trail_fence_data;
249 };
250
251 struct mes_suspend_gang_input {
252         bool            suspend_all_gangs;
253         uint64_t        gang_context_addr;
254         uint64_t        suspend_fence_addr;
255         uint32_t        suspend_fence_value;
256 };
257
258 struct mes_resume_gang_input {
259         bool            resume_all_gangs;
260         uint64_t        gang_context_addr;
261 };
262
263 enum mes_misc_opcode {
264         MES_MISC_OP_WRITE_REG,
265         MES_MISC_OP_READ_REG,
266         MES_MISC_OP_WRM_REG_WAIT,
267         MES_MISC_OP_WRM_REG_WR_WAIT,
268         MES_MISC_OP_SET_SHADER_DEBUGGER,
269 };
270
271 struct mes_misc_op_input {
272         enum mes_misc_opcode op;
273
274         union {
275                 struct {
276                         uint32_t                  reg_offset;
277                         uint64_t                  buffer_addr;
278                 } read_reg;
279
280                 struct {
281                         uint32_t                  reg_offset;
282                         uint32_t                  reg_value;
283                 } write_reg;
284
285                 struct {
286                         uint32_t                   ref;
287                         uint32_t                   mask;
288                         uint32_t                   reg0;
289                         uint32_t                   reg1;
290                 } wrm_reg;
291
292                 struct {
293                         uint64_t process_context_addr;
294                         union {
295                                 struct {
296                                         uint64_t single_memop : 1;
297                                         uint64_t single_alu_op : 1;
298                                         uint64_t reserved: 30;
299                                 };
300                                 uint32_t u32all;
301                         } flags;
302                         uint32_t spi_gdbg_per_vmid_cntl;
303                         uint32_t tcp_watch_cntl[4];
304                         uint32_t trap_en;
305                 } set_shader_debugger;
306         };
307 };
308
309 struct amdgpu_mes_funcs {
310         int (*add_hw_queue)(struct amdgpu_mes *mes,
311                             struct mes_add_queue_input *input);
312
313         int (*remove_hw_queue)(struct amdgpu_mes *mes,
314                                struct mes_remove_queue_input *input);
315
316         int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
317                                   struct mes_unmap_legacy_queue_input *input);
318
319         int (*suspend_gang)(struct amdgpu_mes *mes,
320                             struct mes_suspend_gang_input *input);
321
322         int (*resume_gang)(struct amdgpu_mes *mes,
323                            struct mes_resume_gang_input *input);
324
325         int (*misc_op)(struct amdgpu_mes *mes,
326                        struct mes_misc_op_input *input);
327 };
328
329 #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
330 #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
331
332 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
333
334 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
335 int amdgpu_mes_init(struct amdgpu_device *adev);
336 void amdgpu_mes_fini(struct amdgpu_device *adev);
337
338 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
339                               struct amdgpu_vm *vm);
340 void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
341
342 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
343                         struct amdgpu_mes_gang_properties *gprops,
344                         int *gang_id);
345 int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
346
347 int amdgpu_mes_suspend(struct amdgpu_device *adev);
348 int amdgpu_mes_resume(struct amdgpu_device *adev);
349
350 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
351                             struct amdgpu_mes_queue_properties *qprops,
352                             int *queue_id);
353 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
354
355 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
356                                   struct amdgpu_ring *ring,
357                                   enum amdgpu_unmap_queues_action action,
358                                   u64 gpu_addr, u64 seq);
359
360 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
361 int amdgpu_mes_wreg(struct amdgpu_device *adev,
362                     uint32_t reg, uint32_t val);
363 int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
364                         uint32_t val, uint32_t mask);
365 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
366                                   uint32_t reg0, uint32_t reg1,
367                                   uint32_t ref, uint32_t mask);
368 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
369                                 uint64_t process_context_addr,
370                                 uint32_t spi_gdbg_per_vmid_cntl,
371                                 const uint32_t *tcp_watch_cntl,
372                                 uint32_t flags,
373                                 bool trap_en);
374
375 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
376                         int queue_type, int idx,
377                         struct amdgpu_mes_ctx_data *ctx_data,
378                         struct amdgpu_ring **out);
379 void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
380                             struct amdgpu_ring *ring);
381
382 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
383                                                    enum amdgpu_mes_priority_level prio);
384
385 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
386                                    struct amdgpu_mes_ctx_data *ctx_data);
387 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
388 int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
389                                  struct amdgpu_vm *vm,
390                                  struct amdgpu_mes_ctx_data *ctx_data);
391 int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
392                                    struct amdgpu_mes_ctx_data *ctx_data);
393
394 int amdgpu_mes_self_test(struct amdgpu_device *adev);
395
396 int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
397                                         unsigned int *doorbell_index);
398 void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
399                                         unsigned int doorbell_index);
400 unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
401                                         struct amdgpu_device *adev,
402                                         uint32_t doorbell_index,
403                                         unsigned int doorbell_id);
404 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
405
406 /*
407  * MES lock can be taken in MMU notifiers.
408  *
409  * A bit more detail about why to set no-FS reclaim with MES lock:
410  *
411  * The purpose of the MMU notifier is to stop GPU access to memory so
412  * that the Linux VM subsystem can move pages around safely. This is
413  * done by preempting user mode queues for the affected process. When
414  * MES is used, MES lock needs to be taken to preempt the queues.
415  *
416  * The MMU notifier callback entry point in the driver is
417  * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
418  * there is:
419  * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
420  * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
421  *
422  * The last part of the chain is a function pointer where we take the
423  * MES lock.
424  *
425  * The problem with taking locks in the MMU notifier is, that MMU
426  * notifiers can be called in reclaim-FS context. That's where the
427  * kernel frees up pages to make room for new page allocations under
428  * memory pressure. While we are running in reclaim-FS context, we must
429  * not trigger another memory reclaim operation because that would
430  * recursively reenter the reclaim code and cause a deadlock. The
431  * memalloc_nofs_save/restore calls guarantee that.
432  *
433  * In addition we also need to avoid lock dependencies on other locks taken
434  * under the MES lock, for example reservation locks. Here is a possible
435  * scenario of a deadlock:
436  * Thread A: takes and holds reservation lock | triggers reclaim-FS |
437  * MMU notifier | blocks trying to take MES lock
438  * Thread B: takes and holds MES lock | blocks trying to take reservation lock
439  *
440  * In this scenario Thread B gets involved in a deadlock even without
441  * triggering a reclaim-FS operation itself.
442  * To fix this and break the lock dependency chain you'd need to either:
443  * 1. protect reservation locks with memalloc_nofs_save/restore, or
444  * 2. avoid taking reservation locks under the MES lock.
445  *
446  * Reservation locks are taken all over the kernel in different subsystems, we
447  * have no control over them and their lock dependencies.So the only workable
448  * solution is to avoid taking other locks under the MES lock.
449  * As a result, make sure no reclaim-FS happens while holding this lock anywhere
450  * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
451  */
452 static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
453 {
454         mutex_lock(&mes->mutex_hidden);
455         mes->saved_flags = memalloc_noreclaim_save();
456 }
457
458 static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
459 {
460         memalloc_noreclaim_restore(mes->saved_flags);
461         mutex_unlock(&mes->mutex_hidden);
462 }
463 #endif /* __AMDGPU_MES_H__ */
This page took 0.055117 seconds and 4 git commands to generate.