]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
ASoC: simple-card: Use snd_soc_of_parse_aux_devs()
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36 #include "amdgpu_amdkfd.h"
37
38 /* Size of the per-pipe EOP queue */
39 #define CIK_HPD_EOP_BYTES_LOG2 11
40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
43                                         unsigned int pasid, unsigned int vmid);
44
45 static int execute_queues_cpsch(struct device_queue_manager *dqm,
46                                 enum kfd_unmap_queues_filter filter,
47                                 uint32_t filter_param);
48 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
49                                 enum kfd_unmap_queues_filter filter,
50                                 uint32_t filter_param);
51
52 static int map_queues_cpsch(struct device_queue_manager *dqm);
53
54 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
55                                 struct queue *q);
56
57 static inline void deallocate_hqd(struct device_queue_manager *dqm,
58                                 struct queue *q);
59 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
60 static int allocate_sdma_queue(struct device_queue_manager *dqm,
61                                 struct queue *q);
62 static void kfd_process_hw_exception(struct work_struct *work);
63
64 static inline
65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
66 {
67         if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
68                 return KFD_MQD_TYPE_SDMA;
69         return KFD_MQD_TYPE_CP;
70 }
71
72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
73 {
74         int i;
75         int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
76                 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
77
78         /* queue is available for KFD usage if bit is 1 */
79         for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
80                 if (test_bit(pipe_offset + i,
81                               dqm->dev->shared_resources.cp_queue_bitmap))
82                         return true;
83         return false;
84 }
85
86 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
87 {
88         return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
89                                 KGD_MAX_QUEUES);
90 }
91
92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
93 {
94         return dqm->dev->shared_resources.num_queue_per_pipe;
95 }
96
97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
98 {
99         return dqm->dev->shared_resources.num_pipe_per_mec;
100 }
101
102 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
103 {
104         return dqm->dev->device_info->num_sdma_engines;
105 }
106
107 static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
108 {
109         return dqm->dev->device_info->num_xgmi_sdma_engines;
110 }
111
112 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
113 {
114         return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
115 }
116
117 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
118 {
119         return dqm->dev->device_info->num_sdma_engines
120                         * dqm->dev->device_info->num_sdma_queues_per_engine;
121 }
122
123 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
124 {
125         return dqm->dev->device_info->num_xgmi_sdma_engines
126                         * dqm->dev->device_info->num_sdma_queues_per_engine;
127 }
128
129 void program_sh_mem_settings(struct device_queue_manager *dqm,
130                                         struct qcm_process_device *qpd)
131 {
132         return dqm->dev->kfd2kgd->program_sh_mem_settings(
133                                                 dqm->dev->kgd, qpd->vmid,
134                                                 qpd->sh_mem_config,
135                                                 qpd->sh_mem_ape1_base,
136                                                 qpd->sh_mem_ape1_limit,
137                                                 qpd->sh_mem_bases);
138 }
139
140 static void increment_queue_count(struct device_queue_manager *dqm,
141                         enum kfd_queue_type type)
142 {
143         dqm->active_queue_count++;
144         if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
145                 dqm->active_cp_queue_count++;
146 }
147
148 static void decrement_queue_count(struct device_queue_manager *dqm,
149                         enum kfd_queue_type type)
150 {
151         dqm->active_queue_count--;
152         if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
153                 dqm->active_cp_queue_count--;
154 }
155
156 int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
157 {
158         int ret;
159         uint64_t tmp = 0;
160
161         if (!val)
162                 return -EINVAL;
163         /*
164          * SDMA activity counter is stored at queue's RPTR + 0x8 location.
165          */
166         if (!access_ok((const void __user *)(q_rptr +
167                                         sizeof(uint64_t)), sizeof(uint64_t))) {
168                 pr_err("Can't access sdma queue activity counter\n");
169                 return -EFAULT;
170         }
171
172         ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
173         if (!ret) {
174                 *val = tmp;
175         }
176
177         return ret;
178 }
179
180 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
181 {
182         struct kfd_dev *dev = qpd->dqm->dev;
183
184         if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
185                 /* On pre-SOC15 chips we need to use the queue ID to
186                  * preserve the user mode ABI.
187                  */
188                 q->doorbell_id = q->properties.queue_id;
189         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
190                         q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
191                 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
192                  * doorbell assignments based on the engine and queue id.
193                  * The doobell index distance between RLC (2*i) and (2*i+1)
194                  * for a SDMA engine is 512.
195                  */
196                 uint32_t *idx_offset =
197                                 dev->shared_resources.sdma_doorbell_idx;
198
199                 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
200                         + (q->properties.sdma_queue_id & 1)
201                         * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
202                         + (q->properties.sdma_queue_id >> 1);
203         } else {
204                 /* For CP queues on SOC15 reserve a free doorbell ID */
205                 unsigned int found;
206
207                 found = find_first_zero_bit(qpd->doorbell_bitmap,
208                                             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
209                 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
210                         pr_debug("No doorbells available");
211                         return -EBUSY;
212                 }
213                 set_bit(found, qpd->doorbell_bitmap);
214                 q->doorbell_id = found;
215         }
216
217         q->properties.doorbell_off =
218                 kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
219                                           q->doorbell_id);
220
221         return 0;
222 }
223
224 static void deallocate_doorbell(struct qcm_process_device *qpd,
225                                 struct queue *q)
226 {
227         unsigned int old;
228         struct kfd_dev *dev = qpd->dqm->dev;
229
230         if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
231             q->properties.type == KFD_QUEUE_TYPE_SDMA ||
232             q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
233                 return;
234
235         old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
236         WARN_ON(!old);
237 }
238
239 static int allocate_vmid(struct device_queue_manager *dqm,
240                         struct qcm_process_device *qpd,
241                         struct queue *q)
242 {
243         int allocated_vmid = -1, i;
244
245         for (i = dqm->dev->vm_info.first_vmid_kfd;
246                         i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
247                 if (!dqm->vmid_pasid[i]) {
248                         allocated_vmid = i;
249                         break;
250                 }
251         }
252
253         if (allocated_vmid < 0) {
254                 pr_err("no more vmid to allocate\n");
255                 return -ENOSPC;
256         }
257
258         pr_debug("vmid allocated: %d\n", allocated_vmid);
259
260         dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
261
262         set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
263
264         qpd->vmid = allocated_vmid;
265         q->properties.vmid = allocated_vmid;
266
267         program_sh_mem_settings(dqm, qpd);
268
269         /* qpd->page_table_base is set earlier when register_process()
270          * is called, i.e. when the first queue is created.
271          */
272         dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
273                         qpd->vmid,
274                         qpd->page_table_base);
275         /* invalidate the VM context after pasid and vmid mapping is set up */
276         kfd_flush_tlb(qpd_to_pdd(qpd));
277
278         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
279                 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
280                                 qpd->sh_hidden_private_base, qpd->vmid);
281
282         return 0;
283 }
284
285 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
286                                 struct qcm_process_device *qpd)
287 {
288         const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
289         int ret;
290
291         if (!qpd->ib_kaddr)
292                 return -ENOMEM;
293
294         ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
295         if (ret)
296                 return ret;
297
298         return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
299                                 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
300                                 pmf->release_mem_size / sizeof(uint32_t));
301 }
302
303 static void deallocate_vmid(struct device_queue_manager *dqm,
304                                 struct qcm_process_device *qpd,
305                                 struct queue *q)
306 {
307         /* On GFX v7, CP doesn't flush TC at dequeue */
308         if (q->device->device_info->asic_family == CHIP_HAWAII)
309                 if (flush_texture_cache_nocpsch(q->device, qpd))
310                         pr_err("Failed to flush TC\n");
311
312         kfd_flush_tlb(qpd_to_pdd(qpd));
313
314         /* Release the vmid mapping */
315         set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
316         dqm->vmid_pasid[qpd->vmid] = 0;
317
318         qpd->vmid = 0;
319         q->properties.vmid = 0;
320 }
321
322 static int create_queue_nocpsch(struct device_queue_manager *dqm,
323                                 struct queue *q,
324                                 struct qcm_process_device *qpd)
325 {
326         struct mqd_manager *mqd_mgr;
327         int retval;
328
329         dqm_lock(dqm);
330
331         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
332                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
333                                 dqm->total_queue_count);
334                 retval = -EPERM;
335                 goto out_unlock;
336         }
337
338         if (list_empty(&qpd->queues_list)) {
339                 retval = allocate_vmid(dqm, qpd, q);
340                 if (retval)
341                         goto out_unlock;
342         }
343         q->properties.vmid = qpd->vmid;
344         /*
345          * Eviction state logic: mark all queues as evicted, even ones
346          * not currently active. Restoring inactive queues later only
347          * updates the is_evicted flag but is a no-op otherwise.
348          */
349         q->properties.is_evicted = !!qpd->evicted;
350
351         q->properties.tba_addr = qpd->tba_addr;
352         q->properties.tma_addr = qpd->tma_addr;
353
354         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
355                         q->properties.type)];
356         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
357                 retval = allocate_hqd(dqm, q);
358                 if (retval)
359                         goto deallocate_vmid;
360                 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
361                         q->pipe, q->queue);
362         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
363                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
364                 retval = allocate_sdma_queue(dqm, q);
365                 if (retval)
366                         goto deallocate_vmid;
367                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
368         }
369
370         retval = allocate_doorbell(qpd, q);
371         if (retval)
372                 goto out_deallocate_hqd;
373
374         /* Temporarily release dqm lock to avoid a circular lock dependency */
375         dqm_unlock(dqm);
376         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
377         dqm_lock(dqm);
378
379         if (!q->mqd_mem_obj) {
380                 retval = -ENOMEM;
381                 goto out_deallocate_doorbell;
382         }
383         mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
384                                 &q->gart_mqd_addr, &q->properties);
385         if (q->properties.is_active) {
386                 if (!dqm->sched_running) {
387                         WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
388                         goto add_queue_to_list;
389                 }
390
391                 if (WARN(q->process->mm != current->mm,
392                                         "should only run in user thread"))
393                         retval = -EFAULT;
394                 else
395                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
396                                         q->queue, &q->properties, current->mm);
397                 if (retval)
398                         goto out_free_mqd;
399         }
400
401 add_queue_to_list:
402         list_add(&q->list, &qpd->queues_list);
403         qpd->queue_count++;
404         if (q->properties.is_active)
405                 increment_queue_count(dqm, q->properties.type);
406
407         /*
408          * Unconditionally increment this counter, regardless of the queue's
409          * type or whether the queue is active.
410          */
411         dqm->total_queue_count++;
412         pr_debug("Total of %d queues are accountable so far\n",
413                         dqm->total_queue_count);
414         goto out_unlock;
415
416 out_free_mqd:
417         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
418 out_deallocate_doorbell:
419         deallocate_doorbell(qpd, q);
420 out_deallocate_hqd:
421         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
422                 deallocate_hqd(dqm, q);
423         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
424                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
425                 deallocate_sdma_queue(dqm, q);
426 deallocate_vmid:
427         if (list_empty(&qpd->queues_list))
428                 deallocate_vmid(dqm, qpd, q);
429 out_unlock:
430         dqm_unlock(dqm);
431         return retval;
432 }
433
434 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
435 {
436         bool set;
437         int pipe, bit, i;
438
439         set = false;
440
441         for (pipe = dqm->next_pipe_to_allocate, i = 0;
442                         i < get_pipes_per_mec(dqm);
443                         pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
444
445                 if (!is_pipe_enabled(dqm, 0, pipe))
446                         continue;
447
448                 if (dqm->allocated_queues[pipe] != 0) {
449                         bit = ffs(dqm->allocated_queues[pipe]) - 1;
450                         dqm->allocated_queues[pipe] &= ~(1 << bit);
451                         q->pipe = pipe;
452                         q->queue = bit;
453                         set = true;
454                         break;
455                 }
456         }
457
458         if (!set)
459                 return -EBUSY;
460
461         pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
462         /* horizontal hqd allocation */
463         dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
464
465         return 0;
466 }
467
468 static inline void deallocate_hqd(struct device_queue_manager *dqm,
469                                 struct queue *q)
470 {
471         dqm->allocated_queues[q->pipe] |= (1 << q->queue);
472 }
473
474 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
475  * to avoid asynchronized access
476  */
477 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
478                                 struct qcm_process_device *qpd,
479                                 struct queue *q)
480 {
481         int retval;
482         struct mqd_manager *mqd_mgr;
483
484         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
485                         q->properties.type)];
486
487         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
488                 deallocate_hqd(dqm, q);
489         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
490                 deallocate_sdma_queue(dqm, q);
491         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
492                 deallocate_sdma_queue(dqm, q);
493         else {
494                 pr_debug("q->properties.type %d is invalid\n",
495                                 q->properties.type);
496                 return -EINVAL;
497         }
498         dqm->total_queue_count--;
499
500         deallocate_doorbell(qpd, q);
501
502         if (!dqm->sched_running) {
503                 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
504                 return 0;
505         }
506
507         retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
508                                 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
509                                 KFD_UNMAP_LATENCY_MS,
510                                 q->pipe, q->queue);
511         if (retval == -ETIME)
512                 qpd->reset_wavefronts = true;
513
514
515         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
516
517         list_del(&q->list);
518         if (list_empty(&qpd->queues_list)) {
519                 if (qpd->reset_wavefronts) {
520                         pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
521                                         dqm->dev);
522                         /* dbgdev_wave_reset_wavefronts has to be called before
523                          * deallocate_vmid(), i.e. when vmid is still in use.
524                          */
525                         dbgdev_wave_reset_wavefronts(dqm->dev,
526                                         qpd->pqm->process);
527                         qpd->reset_wavefronts = false;
528                 }
529
530                 deallocate_vmid(dqm, qpd, q);
531         }
532         qpd->queue_count--;
533         if (q->properties.is_active) {
534                 decrement_queue_count(dqm, q->properties.type);
535                 if (q->properties.is_gws) {
536                         dqm->gws_queue_count--;
537                         qpd->mapped_gws_queue = false;
538                 }
539         }
540
541         return retval;
542 }
543
544 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
545                                 struct qcm_process_device *qpd,
546                                 struct queue *q)
547 {
548         int retval;
549         uint64_t sdma_val = 0;
550         struct kfd_process_device *pdd = qpd_to_pdd(qpd);
551
552         /* Get the SDMA queue stats */
553         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
554             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
555                 retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
556                                                         &sdma_val);
557                 if (retval)
558                         pr_err("Failed to read SDMA queue counter for queue: %d\n",
559                                 q->properties.queue_id);
560         }
561
562         dqm_lock(dqm);
563         retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
564         if (!retval)
565                 pdd->sdma_past_activity_counter += sdma_val;
566         dqm_unlock(dqm);
567
568         return retval;
569 }
570
571 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
572 {
573         int retval = 0;
574         struct mqd_manager *mqd_mgr;
575         struct kfd_process_device *pdd;
576         bool prev_active = false;
577
578         dqm_lock(dqm);
579         pdd = kfd_get_process_device_data(q->device, q->process);
580         if (!pdd) {
581                 retval = -ENODEV;
582                 goto out_unlock;
583         }
584         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
585                         q->properties.type)];
586
587         /* Save previous activity state for counters */
588         prev_active = q->properties.is_active;
589
590         /* Make sure the queue is unmapped before updating the MQD */
591         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
592                 retval = unmap_queues_cpsch(dqm,
593                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
594                 if (retval) {
595                         pr_err("unmap queue failed\n");
596                         goto out_unlock;
597                 }
598         } else if (prev_active &&
599                    (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
600                     q->properties.type == KFD_QUEUE_TYPE_SDMA ||
601                     q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
602
603                 if (!dqm->sched_running) {
604                         WARN_ONCE(1, "Update non-HWS queue while stopped\n");
605                         goto out_unlock;
606                 }
607
608                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
609                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
610                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
611                 if (retval) {
612                         pr_err("destroy mqd failed\n");
613                         goto out_unlock;
614                 }
615         }
616
617         mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
618
619         /*
620          * check active state vs. the previous state and modify
621          * counter accordingly. map_queues_cpsch uses the
622          * dqm->active_queue_count to determine whether a new runlist must be
623          * uploaded.
624          */
625         if (q->properties.is_active && !prev_active)
626                 increment_queue_count(dqm, q->properties.type);
627         else if (!q->properties.is_active && prev_active)
628                 decrement_queue_count(dqm, q->properties.type);
629
630         if (q->gws && !q->properties.is_gws) {
631                 if (q->properties.is_active) {
632                         dqm->gws_queue_count++;
633                         pdd->qpd.mapped_gws_queue = true;
634                 }
635                 q->properties.is_gws = true;
636         } else if (!q->gws && q->properties.is_gws) {
637                 if (q->properties.is_active) {
638                         dqm->gws_queue_count--;
639                         pdd->qpd.mapped_gws_queue = false;
640                 }
641                 q->properties.is_gws = false;
642         }
643
644         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
645                 retval = map_queues_cpsch(dqm);
646         else if (q->properties.is_active &&
647                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
648                   q->properties.type == KFD_QUEUE_TYPE_SDMA ||
649                   q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
650                 if (WARN(q->process->mm != current->mm,
651                          "should only run in user thread"))
652                         retval = -EFAULT;
653                 else
654                         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
655                                                    q->pipe, q->queue,
656                                                    &q->properties, current->mm);
657         }
658
659 out_unlock:
660         dqm_unlock(dqm);
661         return retval;
662 }
663
664 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
665                                         struct qcm_process_device *qpd)
666 {
667         struct queue *q;
668         struct mqd_manager *mqd_mgr;
669         struct kfd_process_device *pdd;
670         int retval, ret = 0;
671
672         dqm_lock(dqm);
673         if (qpd->evicted++ > 0) /* already evicted, do nothing */
674                 goto out;
675
676         pdd = qpd_to_pdd(qpd);
677         pr_info_ratelimited("Evicting PASID 0x%x queues\n",
678                             pdd->process->pasid);
679
680         /* Mark all queues as evicted. Deactivate all active queues on
681          * the qpd.
682          */
683         list_for_each_entry(q, &qpd->queues_list, list) {
684                 q->properties.is_evicted = true;
685                 if (!q->properties.is_active)
686                         continue;
687
688                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
689                                 q->properties.type)];
690                 q->properties.is_active = false;
691                 decrement_queue_count(dqm, q->properties.type);
692                 if (q->properties.is_gws) {
693                         dqm->gws_queue_count--;
694                         qpd->mapped_gws_queue = false;
695                 }
696
697                 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
698                         continue;
699
700                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
701                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
702                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
703                 if (retval && !ret)
704                         /* Return the first error, but keep going to
705                          * maintain a consistent eviction state
706                          */
707                         ret = retval;
708         }
709
710 out:
711         dqm_unlock(dqm);
712         return ret;
713 }
714
715 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
716                                       struct qcm_process_device *qpd)
717 {
718         struct queue *q;
719         struct kfd_process_device *pdd;
720         int retval = 0;
721
722         dqm_lock(dqm);
723         if (qpd->evicted++ > 0) /* already evicted, do nothing */
724                 goto out;
725
726         pdd = qpd_to_pdd(qpd);
727         pr_info_ratelimited("Evicting PASID 0x%x queues\n",
728                             pdd->process->pasid);
729
730         /* Mark all queues as evicted. Deactivate all active queues on
731          * the qpd.
732          */
733         list_for_each_entry(q, &qpd->queues_list, list) {
734                 q->properties.is_evicted = true;
735                 if (!q->properties.is_active)
736                         continue;
737
738                 q->properties.is_active = false;
739                 decrement_queue_count(dqm, q->properties.type);
740         }
741         retval = execute_queues_cpsch(dqm,
742                                 qpd->is_debug ?
743                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
744                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
745
746 out:
747         dqm_unlock(dqm);
748         return retval;
749 }
750
751 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
752                                           struct qcm_process_device *qpd)
753 {
754         struct mm_struct *mm = NULL;
755         struct queue *q;
756         struct mqd_manager *mqd_mgr;
757         struct kfd_process_device *pdd;
758         uint64_t pd_base;
759         int retval, ret = 0;
760
761         pdd = qpd_to_pdd(qpd);
762         /* Retrieve PD base */
763         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
764
765         dqm_lock(dqm);
766         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
767                 goto out;
768         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
769                 qpd->evicted--;
770                 goto out;
771         }
772
773         pr_info_ratelimited("Restoring PASID 0x%x queues\n",
774                             pdd->process->pasid);
775
776         /* Update PD Base in QPD */
777         qpd->page_table_base = pd_base;
778         pr_debug("Updated PD address to 0x%llx\n", pd_base);
779
780         if (!list_empty(&qpd->queues_list)) {
781                 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
782                                 dqm->dev->kgd,
783                                 qpd->vmid,
784                                 qpd->page_table_base);
785                 kfd_flush_tlb(pdd);
786         }
787
788         /* Take a safe reference to the mm_struct, which may otherwise
789          * disappear even while the kfd_process is still referenced.
790          */
791         mm = get_task_mm(pdd->process->lead_thread);
792         if (!mm) {
793                 ret = -EFAULT;
794                 goto out;
795         }
796
797         /* Remove the eviction flags. Activate queues that are not
798          * inactive for other reasons.
799          */
800         list_for_each_entry(q, &qpd->queues_list, list) {
801                 q->properties.is_evicted = false;
802                 if (!QUEUE_IS_ACTIVE(q->properties))
803                         continue;
804
805                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
806                                 q->properties.type)];
807                 q->properties.is_active = true;
808                 increment_queue_count(dqm, q->properties.type);
809                 if (q->properties.is_gws) {
810                         dqm->gws_queue_count++;
811                         qpd->mapped_gws_queue = true;
812                 }
813
814                 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
815                         continue;
816
817                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
818                                        q->queue, &q->properties, mm);
819                 if (retval && !ret)
820                         /* Return the first error, but keep going to
821                          * maintain a consistent eviction state
822                          */
823                         ret = retval;
824         }
825         qpd->evicted = 0;
826 out:
827         if (mm)
828                 mmput(mm);
829         dqm_unlock(dqm);
830         return ret;
831 }
832
833 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
834                                         struct qcm_process_device *qpd)
835 {
836         struct queue *q;
837         struct kfd_process_device *pdd;
838         uint64_t pd_base;
839         int retval = 0;
840
841         pdd = qpd_to_pdd(qpd);
842         /* Retrieve PD base */
843         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
844
845         dqm_lock(dqm);
846         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
847                 goto out;
848         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
849                 qpd->evicted--;
850                 goto out;
851         }
852
853         pr_info_ratelimited("Restoring PASID 0x%x queues\n",
854                             pdd->process->pasid);
855
856         /* Update PD Base in QPD */
857         qpd->page_table_base = pd_base;
858         pr_debug("Updated PD address to 0x%llx\n", pd_base);
859
860         /* activate all active queues on the qpd */
861         list_for_each_entry(q, &qpd->queues_list, list) {
862                 q->properties.is_evicted = false;
863                 if (!QUEUE_IS_ACTIVE(q->properties))
864                         continue;
865
866                 q->properties.is_active = true;
867                 increment_queue_count(dqm, q->properties.type);
868         }
869         retval = execute_queues_cpsch(dqm,
870                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
871         qpd->evicted = 0;
872 out:
873         dqm_unlock(dqm);
874         return retval;
875 }
876
877 static int register_process(struct device_queue_manager *dqm,
878                                         struct qcm_process_device *qpd)
879 {
880         struct device_process_node *n;
881         struct kfd_process_device *pdd;
882         uint64_t pd_base;
883         int retval;
884
885         n = kzalloc(sizeof(*n), GFP_KERNEL);
886         if (!n)
887                 return -ENOMEM;
888
889         n->qpd = qpd;
890
891         pdd = qpd_to_pdd(qpd);
892         /* Retrieve PD base */
893         pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
894
895         dqm_lock(dqm);
896         list_add(&n->list, &dqm->queues);
897
898         /* Update PD Base in QPD */
899         qpd->page_table_base = pd_base;
900         pr_debug("Updated PD address to 0x%llx\n", pd_base);
901
902         retval = dqm->asic_ops.update_qpd(dqm, qpd);
903
904         dqm->processes_count++;
905
906         dqm_unlock(dqm);
907
908         /* Outside the DQM lock because under the DQM lock we can't do
909          * reclaim or take other locks that others hold while reclaiming.
910          */
911         kfd_inc_compute_active(dqm->dev);
912
913         return retval;
914 }
915
916 static int unregister_process(struct device_queue_manager *dqm,
917                                         struct qcm_process_device *qpd)
918 {
919         int retval;
920         struct device_process_node *cur, *next;
921
922         pr_debug("qpd->queues_list is %s\n",
923                         list_empty(&qpd->queues_list) ? "empty" : "not empty");
924
925         retval = 0;
926         dqm_lock(dqm);
927
928         list_for_each_entry_safe(cur, next, &dqm->queues, list) {
929                 if (qpd == cur->qpd) {
930                         list_del(&cur->list);
931                         kfree(cur);
932                         dqm->processes_count--;
933                         goto out;
934                 }
935         }
936         /* qpd not found in dqm list */
937         retval = 1;
938 out:
939         dqm_unlock(dqm);
940
941         /* Outside the DQM lock because under the DQM lock we can't do
942          * reclaim or take other locks that others hold while reclaiming.
943          */
944         if (!retval)
945                 kfd_dec_compute_active(dqm->dev);
946
947         return retval;
948 }
949
950 static int
951 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
952                         unsigned int vmid)
953 {
954         return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
955                                                 dqm->dev->kgd, pasid, vmid);
956 }
957
958 static void init_interrupts(struct device_queue_manager *dqm)
959 {
960         unsigned int i;
961
962         for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
963                 if (is_pipe_enabled(dqm, 0, i))
964                         dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
965 }
966
967 static int initialize_nocpsch(struct device_queue_manager *dqm)
968 {
969         int pipe, queue;
970
971         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
972
973         dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
974                                         sizeof(unsigned int), GFP_KERNEL);
975         if (!dqm->allocated_queues)
976                 return -ENOMEM;
977
978         mutex_init(&dqm->lock_hidden);
979         INIT_LIST_HEAD(&dqm->queues);
980         dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
981         dqm->active_cp_queue_count = 0;
982         dqm->gws_queue_count = 0;
983
984         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
985                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
986
987                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
988                         if (test_bit(pipe_offset + queue,
989                                      dqm->dev->shared_resources.cp_queue_bitmap))
990                                 dqm->allocated_queues[pipe] |= 1 << queue;
991         }
992
993         memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
994
995         dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
996         dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
997
998         return 0;
999 }
1000
1001 static void uninitialize(struct device_queue_manager *dqm)
1002 {
1003         int i;
1004
1005         WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1006
1007         kfree(dqm->allocated_queues);
1008         for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1009                 kfree(dqm->mqd_mgrs[i]);
1010         mutex_destroy(&dqm->lock_hidden);
1011 }
1012
1013 static int start_nocpsch(struct device_queue_manager *dqm)
1014 {
1015         pr_info("SW scheduler is used");
1016         init_interrupts(dqm);
1017         
1018         if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1019                 return pm_init(&dqm->packets, dqm);
1020         dqm->sched_running = true;
1021
1022         return 0;
1023 }
1024
1025 static int stop_nocpsch(struct device_queue_manager *dqm)
1026 {
1027         if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1028                 pm_uninit(&dqm->packets, false);
1029         dqm->sched_running = false;
1030
1031         return 0;
1032 }
1033
1034 static void pre_reset(struct device_queue_manager *dqm)
1035 {
1036         dqm_lock(dqm);
1037         dqm->is_resetting = true;
1038         dqm_unlock(dqm);
1039 }
1040
1041 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1042                                 struct queue *q)
1043 {
1044         int bit;
1045
1046         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1047                 if (dqm->sdma_bitmap == 0) {
1048                         pr_err("No more SDMA queue to allocate\n");
1049                         return -ENOMEM;
1050                 }
1051
1052                 bit = __ffs64(dqm->sdma_bitmap);
1053                 dqm->sdma_bitmap &= ~(1ULL << bit);
1054                 q->sdma_id = bit;
1055                 q->properties.sdma_engine_id = q->sdma_id %
1056                                 get_num_sdma_engines(dqm);
1057                 q->properties.sdma_queue_id = q->sdma_id /
1058                                 get_num_sdma_engines(dqm);
1059         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1060                 if (dqm->xgmi_sdma_bitmap == 0) {
1061                         pr_err("No more XGMI SDMA queue to allocate\n");
1062                         return -ENOMEM;
1063                 }
1064                 bit = __ffs64(dqm->xgmi_sdma_bitmap);
1065                 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1066                 q->sdma_id = bit;
1067                 /* sdma_engine_id is sdma id including
1068                  * both PCIe-optimized SDMAs and XGMI-
1069                  * optimized SDMAs. The calculation below
1070                  * assumes the first N engines are always
1071                  * PCIe-optimized ones
1072                  */
1073                 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
1074                                 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
1075                 q->properties.sdma_queue_id = q->sdma_id /
1076                                 get_num_xgmi_sdma_engines(dqm);
1077         }
1078
1079         pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1080         pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1081
1082         return 0;
1083 }
1084
1085 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1086                                 struct queue *q)
1087 {
1088         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1089                 if (q->sdma_id >= get_num_sdma_queues(dqm))
1090                         return;
1091                 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1092         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1093                 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1094                         return;
1095                 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1096         }
1097 }
1098
1099 /*
1100  * Device Queue Manager implementation for cp scheduler
1101  */
1102
1103 static int set_sched_resources(struct device_queue_manager *dqm)
1104 {
1105         int i, mec;
1106         struct scheduling_resources res;
1107
1108         res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1109
1110         res.queue_mask = 0;
1111         for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1112                 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1113                         / dqm->dev->shared_resources.num_pipe_per_mec;
1114
1115                 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1116                         continue;
1117
1118                 /* only acquire queues from the first MEC */
1119                 if (mec > 0)
1120                         continue;
1121
1122                 /* This situation may be hit in the future if a new HW
1123                  * generation exposes more than 64 queues. If so, the
1124                  * definition of res.queue_mask needs updating
1125                  */
1126                 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1127                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1128                         break;
1129                 }
1130
1131                 res.queue_mask |= 1ull
1132                         << amdgpu_queue_mask_bit_to_set_resource_bit(
1133                                 (struct amdgpu_device *)dqm->dev->kgd, i);
1134         }
1135         res.gws_mask = ~0ull;
1136         res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1137
1138         pr_debug("Scheduling resources:\n"
1139                         "vmid mask: 0x%8X\n"
1140                         "queue mask: 0x%8llX\n",
1141                         res.vmid_mask, res.queue_mask);
1142
1143         return pm_send_set_resources(&dqm->packets, &res);
1144 }
1145
1146 static int initialize_cpsch(struct device_queue_manager *dqm)
1147 {
1148         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1149
1150         mutex_init(&dqm->lock_hidden);
1151         INIT_LIST_HEAD(&dqm->queues);
1152         dqm->active_queue_count = dqm->processes_count = 0;
1153         dqm->active_cp_queue_count = 0;
1154         dqm->gws_queue_count = 0;
1155         dqm->active_runlist = false;
1156         dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1157         dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1158
1159         INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1160
1161         return 0;
1162 }
1163
1164 static int start_cpsch(struct device_queue_manager *dqm)
1165 {
1166         int retval;
1167
1168         retval = 0;
1169
1170         retval = pm_init(&dqm->packets, dqm);
1171         if (retval)
1172                 goto fail_packet_manager_init;
1173
1174         retval = set_sched_resources(dqm);
1175         if (retval)
1176                 goto fail_set_sched_resources;
1177
1178         pr_debug("Allocating fence memory\n");
1179
1180         /* allocate fence memory on the gart */
1181         retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1182                                         &dqm->fence_mem);
1183
1184         if (retval)
1185                 goto fail_allocate_vidmem;
1186
1187         dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1188         dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1189
1190         init_interrupts(dqm);
1191
1192         dqm_lock(dqm);
1193         /* clear hang status when driver try to start the hw scheduler */
1194         dqm->is_hws_hang = false;
1195         dqm->is_resetting = false;
1196         dqm->sched_running = true;
1197         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1198         dqm_unlock(dqm);
1199
1200         return 0;
1201 fail_allocate_vidmem:
1202 fail_set_sched_resources:
1203         pm_uninit(&dqm->packets, false);
1204 fail_packet_manager_init:
1205         return retval;
1206 }
1207
1208 static int stop_cpsch(struct device_queue_manager *dqm)
1209 {
1210         bool hanging;
1211
1212         dqm_lock(dqm);
1213         if (!dqm->is_hws_hang)
1214                 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1215         hanging = dqm->is_hws_hang || dqm->is_resetting;
1216         dqm->sched_running = false;
1217         dqm_unlock(dqm);
1218
1219         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1220         pm_uninit(&dqm->packets, hanging);
1221
1222         return 0;
1223 }
1224
1225 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1226                                         struct kernel_queue *kq,
1227                                         struct qcm_process_device *qpd)
1228 {
1229         dqm_lock(dqm);
1230         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1231                 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1232                                 dqm->total_queue_count);
1233                 dqm_unlock(dqm);
1234                 return -EPERM;
1235         }
1236
1237         /*
1238          * Unconditionally increment this counter, regardless of the queue's
1239          * type or whether the queue is active.
1240          */
1241         dqm->total_queue_count++;
1242         pr_debug("Total of %d queues are accountable so far\n",
1243                         dqm->total_queue_count);
1244
1245         list_add(&kq->list, &qpd->priv_queue_list);
1246         increment_queue_count(dqm, kq->queue->properties.type);
1247         qpd->is_debug = true;
1248         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1249         dqm_unlock(dqm);
1250
1251         return 0;
1252 }
1253
1254 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1255                                         struct kernel_queue *kq,
1256                                         struct qcm_process_device *qpd)
1257 {
1258         dqm_lock(dqm);
1259         list_del(&kq->list);
1260         decrement_queue_count(dqm, kq->queue->properties.type);
1261         qpd->is_debug = false;
1262         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1263         /*
1264          * Unconditionally decrement this counter, regardless of the queue's
1265          * type.
1266          */
1267         dqm->total_queue_count--;
1268         pr_debug("Total of %d queues are accountable so far\n",
1269                         dqm->total_queue_count);
1270         dqm_unlock(dqm);
1271 }
1272
1273 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1274                         struct qcm_process_device *qpd)
1275 {
1276         int retval;
1277         struct mqd_manager *mqd_mgr;
1278
1279         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1280                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1281                                 dqm->total_queue_count);
1282                 retval = -EPERM;
1283                 goto out;
1284         }
1285
1286         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1287                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1288                 dqm_lock(dqm);
1289                 retval = allocate_sdma_queue(dqm, q);
1290                 dqm_unlock(dqm);
1291                 if (retval)
1292                         goto out;
1293         }
1294
1295         retval = allocate_doorbell(qpd, q);
1296         if (retval)
1297                 goto out_deallocate_sdma_queue;
1298
1299         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1300                         q->properties.type)];
1301
1302         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1303                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1304                 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1305         q->properties.tba_addr = qpd->tba_addr;
1306         q->properties.tma_addr = qpd->tma_addr;
1307         q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1308         if (!q->mqd_mem_obj) {
1309                 retval = -ENOMEM;
1310                 goto out_deallocate_doorbell;
1311         }
1312
1313         dqm_lock(dqm);
1314         /*
1315          * Eviction state logic: mark all queues as evicted, even ones
1316          * not currently active. Restoring inactive queues later only
1317          * updates the is_evicted flag but is a no-op otherwise.
1318          */
1319         q->properties.is_evicted = !!qpd->evicted;
1320         mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1321                                 &q->gart_mqd_addr, &q->properties);
1322
1323         list_add(&q->list, &qpd->queues_list);
1324         qpd->queue_count++;
1325
1326         if (q->properties.is_active) {
1327                 increment_queue_count(dqm, q->properties.type);
1328
1329                 retval = execute_queues_cpsch(dqm,
1330                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1331         }
1332
1333         /*
1334          * Unconditionally increment this counter, regardless of the queue's
1335          * type or whether the queue is active.
1336          */
1337         dqm->total_queue_count++;
1338
1339         pr_debug("Total of %d queues are accountable so far\n",
1340                         dqm->total_queue_count);
1341
1342         dqm_unlock(dqm);
1343         return retval;
1344
1345 out_deallocate_doorbell:
1346         deallocate_doorbell(qpd, q);
1347 out_deallocate_sdma_queue:
1348         if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1349                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1350                 dqm_lock(dqm);
1351                 deallocate_sdma_queue(dqm, q);
1352                 dqm_unlock(dqm);
1353         }
1354 out:
1355         return retval;
1356 }
1357
1358 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1359                                 unsigned int fence_value,
1360                                 unsigned int timeout_ms)
1361 {
1362         unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1363
1364         while (*fence_addr != fence_value) {
1365                 if (time_after(jiffies, end_jiffies)) {
1366                         pr_err("qcm fence wait loop timeout expired\n");
1367                         /* In HWS case, this is used to halt the driver thread
1368                          * in order not to mess up CP states before doing
1369                          * scandumps for FW debugging.
1370                          */
1371                         while (halt_if_hws_hang)
1372                                 schedule();
1373
1374                         return -ETIME;
1375                 }
1376                 schedule();
1377         }
1378
1379         return 0;
1380 }
1381
1382 /* dqm->lock mutex has to be locked before calling this function */
1383 static int map_queues_cpsch(struct device_queue_manager *dqm)
1384 {
1385         int retval;
1386
1387         if (!dqm->sched_running)
1388                 return 0;
1389         if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1390                 return 0;
1391         if (dqm->active_runlist)
1392                 return 0;
1393
1394         retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1395         pr_debug("%s sent runlist\n", __func__);
1396         if (retval) {
1397                 pr_err("failed to execute runlist\n");
1398                 return retval;
1399         }
1400         dqm->active_runlist = true;
1401
1402         return retval;
1403 }
1404
1405 /* dqm->lock mutex has to be locked before calling this function */
1406 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1407                                 enum kfd_unmap_queues_filter filter,
1408                                 uint32_t filter_param)
1409 {
1410         int retval = 0;
1411
1412         if (!dqm->sched_running)
1413                 return 0;
1414         if (dqm->is_hws_hang)
1415                 return -EIO;
1416         if (!dqm->active_runlist)
1417                 return retval;
1418
1419         retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1420                         filter, filter_param, false, 0);
1421         if (retval)
1422                 return retval;
1423
1424         *dqm->fence_addr = KFD_FENCE_INIT;
1425         pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1426                                 KFD_FENCE_COMPLETED);
1427         /* should be timed out */
1428         retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1429                                 queue_preemption_timeout_ms);
1430         if (retval) {
1431                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1432                 dqm->is_hws_hang = true;
1433                 /* It's possible we're detecting a HWS hang in the
1434                  * middle of a GPU reset. No need to schedule another
1435                  * reset in this case.
1436                  */
1437                 if (!dqm->is_resetting)
1438                         schedule_work(&dqm->hw_exception_work);
1439                 return retval;
1440         }
1441
1442         pm_release_ib(&dqm->packets);
1443         dqm->active_runlist = false;
1444
1445         return retval;
1446 }
1447
1448 /* dqm->lock mutex has to be locked before calling this function */
1449 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1450                                 enum kfd_unmap_queues_filter filter,
1451                                 uint32_t filter_param)
1452 {
1453         int retval;
1454
1455         if (dqm->is_hws_hang)
1456                 return -EIO;
1457         retval = unmap_queues_cpsch(dqm, filter, filter_param);
1458         if (retval)
1459                 return retval;
1460
1461         return map_queues_cpsch(dqm);
1462 }
1463
1464 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1465                                 struct qcm_process_device *qpd,
1466                                 struct queue *q)
1467 {
1468         int retval;
1469         struct mqd_manager *mqd_mgr;
1470         uint64_t sdma_val = 0;
1471         struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1472
1473         /* Get the SDMA queue stats */
1474         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1475             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1476                 retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
1477                                                         &sdma_val);
1478                 if (retval)
1479                         pr_err("Failed to read SDMA queue counter for queue: %d\n",
1480                                 q->properties.queue_id);
1481         }
1482
1483         retval = 0;
1484
1485         /* remove queue from list to prevent rescheduling after preemption */
1486         dqm_lock(dqm);
1487
1488         if (qpd->is_debug) {
1489                 /*
1490                  * error, currently we do not allow to destroy a queue
1491                  * of a currently debugged process
1492                  */
1493                 retval = -EBUSY;
1494                 goto failed_try_destroy_debugged_queue;
1495
1496         }
1497
1498         mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1499                         q->properties.type)];
1500
1501         deallocate_doorbell(qpd, q);
1502
1503         if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1504             (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1505                 deallocate_sdma_queue(dqm, q);
1506                 pdd->sdma_past_activity_counter += sdma_val;
1507         }
1508
1509         list_del(&q->list);
1510         qpd->queue_count--;
1511         if (q->properties.is_active) {
1512                 decrement_queue_count(dqm, q->properties.type);
1513                 retval = execute_queues_cpsch(dqm,
1514                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1515                 if (retval == -ETIME)
1516                         qpd->reset_wavefronts = true;
1517                 if (q->properties.is_gws) {
1518                         dqm->gws_queue_count--;
1519                         qpd->mapped_gws_queue = false;
1520                 }
1521         }
1522
1523         /*
1524          * Unconditionally decrement this counter, regardless of the queue's
1525          * type
1526          */
1527         dqm->total_queue_count--;
1528         pr_debug("Total of %d queues are accountable so far\n",
1529                         dqm->total_queue_count);
1530
1531         dqm_unlock(dqm);
1532
1533         /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1534         mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1535
1536         return retval;
1537
1538 failed_try_destroy_debugged_queue:
1539
1540         dqm_unlock(dqm);
1541         return retval;
1542 }
1543
1544 /*
1545  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1546  * stay in user mode.
1547  */
1548 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1549 /* APE1 limit is inclusive and 64K aligned. */
1550 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1551
1552 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1553                                    struct qcm_process_device *qpd,
1554                                    enum cache_policy default_policy,
1555                                    enum cache_policy alternate_policy,
1556                                    void __user *alternate_aperture_base,
1557                                    uint64_t alternate_aperture_size)
1558 {
1559         bool retval = true;
1560
1561         if (!dqm->asic_ops.set_cache_memory_policy)
1562                 return retval;
1563
1564         dqm_lock(dqm);
1565
1566         if (alternate_aperture_size == 0) {
1567                 /* base > limit disables APE1 */
1568                 qpd->sh_mem_ape1_base = 1;
1569                 qpd->sh_mem_ape1_limit = 0;
1570         } else {
1571                 /*
1572                  * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1573                  *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1574                  * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1575                  *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1576                  * Verify that the base and size parameters can be
1577                  * represented in this format and convert them.
1578                  * Additionally restrict APE1 to user-mode addresses.
1579                  */
1580
1581                 uint64_t base = (uintptr_t)alternate_aperture_base;
1582                 uint64_t limit = base + alternate_aperture_size - 1;
1583
1584                 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1585                    (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1586                         retval = false;
1587                         goto out;
1588                 }
1589
1590                 qpd->sh_mem_ape1_base = base >> 16;
1591                 qpd->sh_mem_ape1_limit = limit >> 16;
1592         }
1593
1594         retval = dqm->asic_ops.set_cache_memory_policy(
1595                         dqm,
1596                         qpd,
1597                         default_policy,
1598                         alternate_policy,
1599                         alternate_aperture_base,
1600                         alternate_aperture_size);
1601
1602         if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1603                 program_sh_mem_settings(dqm, qpd);
1604
1605         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1606                 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1607                 qpd->sh_mem_ape1_limit);
1608
1609 out:
1610         dqm_unlock(dqm);
1611         return retval;
1612 }
1613
1614 static int set_trap_handler(struct device_queue_manager *dqm,
1615                                 struct qcm_process_device *qpd,
1616                                 uint64_t tba_addr,
1617                                 uint64_t tma_addr)
1618 {
1619         uint64_t *tma;
1620
1621         if (dqm->dev->cwsr_enabled) {
1622                 /* Jump from CWSR trap handler to user trap */
1623                 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1624                 tma[0] = tba_addr;
1625                 tma[1] = tma_addr;
1626         } else {
1627                 qpd->tba_addr = tba_addr;
1628                 qpd->tma_addr = tma_addr;
1629         }
1630
1631         return 0;
1632 }
1633
1634 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1635                 struct qcm_process_device *qpd)
1636 {
1637         struct queue *q, *next;
1638         struct device_process_node *cur, *next_dpn;
1639         int retval = 0;
1640         bool found = false;
1641
1642         dqm_lock(dqm);
1643
1644         /* Clear all user mode queues */
1645         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1646                 int ret;
1647
1648                 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1649                 if (ret)
1650                         retval = ret;
1651         }
1652
1653         /* Unregister process */
1654         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1655                 if (qpd == cur->qpd) {
1656                         list_del(&cur->list);
1657                         kfree(cur);
1658                         dqm->processes_count--;
1659                         found = true;
1660                         break;
1661                 }
1662         }
1663
1664         dqm_unlock(dqm);
1665
1666         /* Outside the DQM lock because under the DQM lock we can't do
1667          * reclaim or take other locks that others hold while reclaiming.
1668          */
1669         if (found)
1670                 kfd_dec_compute_active(dqm->dev);
1671
1672         return retval;
1673 }
1674
1675 static int get_wave_state(struct device_queue_manager *dqm,
1676                           struct queue *q,
1677                           void __user *ctl_stack,
1678                           u32 *ctl_stack_used_size,
1679                           u32 *save_area_used_size)
1680 {
1681         struct mqd_manager *mqd_mgr;
1682         int r;
1683
1684         dqm_lock(dqm);
1685
1686         if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1687             q->properties.is_active || !q->device->cwsr_enabled) {
1688                 r = -EINVAL;
1689                 goto dqm_unlock;
1690         }
1691
1692         mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1693
1694         if (!mqd_mgr->get_wave_state) {
1695                 r = -EINVAL;
1696                 goto dqm_unlock;
1697         }
1698
1699         r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1700                         ctl_stack_used_size, save_area_used_size);
1701
1702 dqm_unlock:
1703         dqm_unlock(dqm);
1704         return r;
1705 }
1706
1707 static int process_termination_cpsch(struct device_queue_manager *dqm,
1708                 struct qcm_process_device *qpd)
1709 {
1710         int retval;
1711         struct queue *q, *next;
1712         struct kernel_queue *kq, *kq_next;
1713         struct mqd_manager *mqd_mgr;
1714         struct device_process_node *cur, *next_dpn;
1715         enum kfd_unmap_queues_filter filter =
1716                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1717         bool found = false;
1718
1719         retval = 0;
1720
1721         dqm_lock(dqm);
1722
1723         /* Clean all kernel queues */
1724         list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1725                 list_del(&kq->list);
1726                 decrement_queue_count(dqm, kq->queue->properties.type);
1727                 qpd->is_debug = false;
1728                 dqm->total_queue_count--;
1729                 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1730         }
1731
1732         /* Clear all user mode queues */
1733         list_for_each_entry(q, &qpd->queues_list, list) {
1734                 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1735                         deallocate_sdma_queue(dqm, q);
1736                 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1737                         deallocate_sdma_queue(dqm, q);
1738
1739                 if (q->properties.is_active) {
1740                         decrement_queue_count(dqm, q->properties.type);
1741                         if (q->properties.is_gws) {
1742                                 dqm->gws_queue_count--;
1743                                 qpd->mapped_gws_queue = false;
1744                         }
1745                 }
1746
1747                 dqm->total_queue_count--;
1748         }
1749
1750         /* Unregister process */
1751         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1752                 if (qpd == cur->qpd) {
1753                         list_del(&cur->list);
1754                         kfree(cur);
1755                         dqm->processes_count--;
1756                         found = true;
1757                         break;
1758                 }
1759         }
1760
1761         retval = execute_queues_cpsch(dqm, filter, 0);
1762         if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1763                 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1764                 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1765                 qpd->reset_wavefronts = false;
1766         }
1767
1768         dqm_unlock(dqm);
1769
1770         /* Outside the DQM lock because under the DQM lock we can't do
1771          * reclaim or take other locks that others hold while reclaiming.
1772          */
1773         if (found)
1774                 kfd_dec_compute_active(dqm->dev);
1775
1776         /* Lastly, free mqd resources.
1777          * Do free_mqd() after dqm_unlock to avoid circular locking.
1778          */
1779         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1780                 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1781                                 q->properties.type)];
1782                 list_del(&q->list);
1783                 qpd->queue_count--;
1784                 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1785         }
1786
1787         return retval;
1788 }
1789
1790 static int init_mqd_managers(struct device_queue_manager *dqm)
1791 {
1792         int i, j;
1793         struct mqd_manager *mqd_mgr;
1794
1795         for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1796                 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1797                 if (!mqd_mgr) {
1798                         pr_err("mqd manager [%d] initialization failed\n", i);
1799                         goto out_free;
1800                 }
1801                 dqm->mqd_mgrs[i] = mqd_mgr;
1802         }
1803
1804         return 0;
1805
1806 out_free:
1807         for (j = 0; j < i; j++) {
1808                 kfree(dqm->mqd_mgrs[j]);
1809                 dqm->mqd_mgrs[j] = NULL;
1810         }
1811
1812         return -ENOMEM;
1813 }
1814
1815 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1816 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1817 {
1818         int retval;
1819         struct kfd_dev *dev = dqm->dev;
1820         struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1821         uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1822                 get_num_all_sdma_engines(dqm) *
1823                 dev->device_info->num_sdma_queues_per_engine +
1824                 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1825
1826         retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1827                 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1828                 (void *)&(mem_obj->cpu_ptr), false);
1829
1830         return retval;
1831 }
1832
1833 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1834 {
1835         struct device_queue_manager *dqm;
1836
1837         pr_debug("Loading device queue manager\n");
1838
1839         dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1840         if (!dqm)
1841                 return NULL;
1842
1843         switch (dev->device_info->asic_family) {
1844         /* HWS is not available on Hawaii. */
1845         case CHIP_HAWAII:
1846         /* HWS depends on CWSR for timely dequeue. CWSR is not
1847          * available on Tonga.
1848          *
1849          * FIXME: This argument also applies to Kaveri.
1850          */
1851         case CHIP_TONGA:
1852                 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1853                 break;
1854         default:
1855                 dqm->sched_policy = sched_policy;
1856                 break;
1857         }
1858
1859         dqm->dev = dev;
1860         switch (dqm->sched_policy) {
1861         case KFD_SCHED_POLICY_HWS:
1862         case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1863                 /* initialize dqm for cp scheduling */
1864                 dqm->ops.create_queue = create_queue_cpsch;
1865                 dqm->ops.initialize = initialize_cpsch;
1866                 dqm->ops.start = start_cpsch;
1867                 dqm->ops.stop = stop_cpsch;
1868                 dqm->ops.pre_reset = pre_reset;
1869                 dqm->ops.destroy_queue = destroy_queue_cpsch;
1870                 dqm->ops.update_queue = update_queue;
1871                 dqm->ops.register_process = register_process;
1872                 dqm->ops.unregister_process = unregister_process;
1873                 dqm->ops.uninitialize = uninitialize;
1874                 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1875                 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1876                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1877                 dqm->ops.set_trap_handler = set_trap_handler;
1878                 dqm->ops.process_termination = process_termination_cpsch;
1879                 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1880                 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1881                 dqm->ops.get_wave_state = get_wave_state;
1882                 break;
1883         case KFD_SCHED_POLICY_NO_HWS:
1884                 /* initialize dqm for no cp scheduling */
1885                 dqm->ops.start = start_nocpsch;
1886                 dqm->ops.stop = stop_nocpsch;
1887                 dqm->ops.pre_reset = pre_reset;
1888                 dqm->ops.create_queue = create_queue_nocpsch;
1889                 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1890                 dqm->ops.update_queue = update_queue;
1891                 dqm->ops.register_process = register_process;
1892                 dqm->ops.unregister_process = unregister_process;
1893                 dqm->ops.initialize = initialize_nocpsch;
1894                 dqm->ops.uninitialize = uninitialize;
1895                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1896                 dqm->ops.set_trap_handler = set_trap_handler;
1897                 dqm->ops.process_termination = process_termination_nocpsch;
1898                 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1899                 dqm->ops.restore_process_queues =
1900                         restore_process_queues_nocpsch;
1901                 dqm->ops.get_wave_state = get_wave_state;
1902                 break;
1903         default:
1904                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1905                 goto out_free;
1906         }
1907
1908         switch (dev->device_info->asic_family) {
1909         case CHIP_CARRIZO:
1910                 device_queue_manager_init_vi(&dqm->asic_ops);
1911                 break;
1912
1913         case CHIP_KAVERI:
1914                 device_queue_manager_init_cik(&dqm->asic_ops);
1915                 break;
1916
1917         case CHIP_HAWAII:
1918                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1919                 break;
1920
1921         case CHIP_TONGA:
1922         case CHIP_FIJI:
1923         case CHIP_POLARIS10:
1924         case CHIP_POLARIS11:
1925         case CHIP_POLARIS12:
1926         case CHIP_VEGAM:
1927                 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1928                 break;
1929
1930         case CHIP_VEGA10:
1931         case CHIP_VEGA12:
1932         case CHIP_VEGA20:
1933         case CHIP_RAVEN:
1934         case CHIP_RENOIR:
1935         case CHIP_ARCTURUS:
1936                 device_queue_manager_init_v9(&dqm->asic_ops);
1937                 break;
1938         case CHIP_NAVI10:
1939         case CHIP_NAVI12:
1940         case CHIP_NAVI14:
1941         case CHIP_SIENNA_CICHLID:
1942         case CHIP_NAVY_FLOUNDER:
1943                 device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1944                 break;
1945         default:
1946                 WARN(1, "Unexpected ASIC family %u",
1947                      dev->device_info->asic_family);
1948                 goto out_free;
1949         }
1950
1951         if (init_mqd_managers(dqm))
1952                 goto out_free;
1953
1954         if (allocate_hiq_sdma_mqd(dqm)) {
1955                 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1956                 goto out_free;
1957         }
1958
1959         if (!dqm->ops.initialize(dqm))
1960                 return dqm;
1961
1962 out_free:
1963         kfree(dqm);
1964         return NULL;
1965 }
1966
1967 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1968                                     struct kfd_mem_obj *mqd)
1969 {
1970         WARN(!mqd, "No hiq sdma mqd trunk to free");
1971
1972         amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1973 }
1974
1975 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1976 {
1977         dqm->ops.uninitialize(dqm);
1978         deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
1979         kfree(dqm);
1980 }
1981
1982 int kfd_process_vm_fault(struct device_queue_manager *dqm,
1983                          unsigned int pasid)
1984 {
1985         struct kfd_process_device *pdd;
1986         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1987         int ret = 0;
1988
1989         if (!p)
1990                 return -EINVAL;
1991         pdd = kfd_get_process_device_data(dqm->dev, p);
1992         if (pdd)
1993                 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1994         kfd_unref_process(p);
1995
1996         return ret;
1997 }
1998
1999 static void kfd_process_hw_exception(struct work_struct *work)
2000 {
2001         struct device_queue_manager *dqm = container_of(work,
2002                         struct device_queue_manager, hw_exception_work);
2003         amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
2004 }
2005
2006 #if defined(CONFIG_DEBUG_FS)
2007
2008 static void seq_reg_dump(struct seq_file *m,
2009                          uint32_t (*dump)[2], uint32_t n_regs)
2010 {
2011         uint32_t i, count;
2012
2013         for (i = 0, count = 0; i < n_regs; i++) {
2014                 if (count == 0 ||
2015                     dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2016                         seq_printf(m, "%s    %08x: %08x",
2017                                    i ? "\n" : "",
2018                                    dump[i][0], dump[i][1]);
2019                         count = 7;
2020                 } else {
2021                         seq_printf(m, " %08x", dump[i][1]);
2022                         count--;
2023                 }
2024         }
2025
2026         seq_puts(m, "\n");
2027 }
2028
2029 int dqm_debugfs_hqds(struct seq_file *m, void *data)
2030 {
2031         struct device_queue_manager *dqm = data;
2032         uint32_t (*dump)[2], n_regs;
2033         int pipe, queue;
2034         int r = 0;
2035
2036         if (!dqm->sched_running) {
2037                 seq_printf(m, " Device is stopped\n");
2038
2039                 return 0;
2040         }
2041
2042         r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
2043                                         KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2044                                         &dump, &n_regs);
2045         if (!r) {
2046                 seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
2047                            KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2048                            KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2049                            KFD_CIK_HIQ_QUEUE);
2050                 seq_reg_dump(m, dump, n_regs);
2051
2052                 kfree(dump);
2053         }
2054
2055         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2056                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
2057
2058                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2059                         if (!test_bit(pipe_offset + queue,
2060                                       dqm->dev->shared_resources.cp_queue_bitmap))
2061                                 continue;
2062
2063                         r = dqm->dev->kfd2kgd->hqd_dump(
2064                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2065                         if (r)
2066                                 break;
2067
2068                         seq_printf(m, "  CP Pipe %d, Queue %d\n",
2069                                   pipe, queue);
2070                         seq_reg_dump(m, dump, n_regs);
2071
2072                         kfree(dump);
2073                 }
2074         }
2075
2076         for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2077                 for (queue = 0;
2078                      queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2079                      queue++) {
2080                         r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2081                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2082                         if (r)
2083                                 break;
2084
2085                         seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2086                                   pipe, queue);
2087                         seq_reg_dump(m, dump, n_regs);
2088
2089                         kfree(dump);
2090                 }
2091         }
2092
2093         return r;
2094 }
2095
2096 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
2097 {
2098         int r = 0;
2099
2100         dqm_lock(dqm);
2101         dqm->active_runlist = true;
2102         r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2103         dqm_unlock(dqm);
2104
2105         return r;
2106 }
2107
2108 #endif
This page took 0.159426 seconds and 4 git commands to generate.