]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drm/amdkfd: Call kfd2kgd.set_compute_idle
[linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/ratelimit.h>
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/list.h>
28 #include <linux/types.h>
29 #include <linux/bitops.h>
30 #include <linux/sched.h>
31 #include "kfd_priv.h"
32 #include "kfd_device_queue_manager.h"
33 #include "kfd_mqd_manager.h"
34 #include "cik_regs.h"
35 #include "kfd_kernel_queue.h"
36
37 /* Size of the per-pipe EOP queue */
38 #define CIK_HPD_EOP_BYTES_LOG2 11
39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
40
41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
42                                         unsigned int pasid, unsigned int vmid);
43
44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
45                                         struct queue *q,
46                                         struct qcm_process_device *qpd);
47
48 static int execute_queues_cpsch(struct device_queue_manager *dqm,
49                                 enum kfd_unmap_queues_filter filter,
50                                 uint32_t filter_param);
51 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
52                                 enum kfd_unmap_queues_filter filter,
53                                 uint32_t filter_param);
54
55 static int map_queues_cpsch(struct device_queue_manager *dqm);
56
57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
58                                         struct queue *q,
59                                         struct qcm_process_device *qpd);
60
61 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
62                                 unsigned int sdma_queue_id);
63
64 static void kfd_process_hw_exception(struct work_struct *work);
65
66 static inline
67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
68 {
69         if (type == KFD_QUEUE_TYPE_SDMA)
70                 return KFD_MQD_TYPE_SDMA;
71         return KFD_MQD_TYPE_CP;
72 }
73
74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
75 {
76         int i;
77         int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
78                 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
79
80         /* queue is available for KFD usage if bit is 1 */
81         for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
82                 if (test_bit(pipe_offset + i,
83                               dqm->dev->shared_resources.queue_bitmap))
84                         return true;
85         return false;
86 }
87
88 unsigned int get_queues_num(struct device_queue_manager *dqm)
89 {
90         return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
91                                 KGD_MAX_QUEUES);
92 }
93
94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
95 {
96         return dqm->dev->shared_resources.num_queue_per_pipe;
97 }
98
99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
100 {
101         return dqm->dev->shared_resources.num_pipe_per_mec;
102 }
103
104 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
105 {
106         return dqm->dev->device_info->num_sdma_engines;
107 }
108
109 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
110 {
111         return dqm->dev->device_info->num_sdma_engines
112                         * KFD_SDMA_QUEUES_PER_ENGINE;
113 }
114
115 void program_sh_mem_settings(struct device_queue_manager *dqm,
116                                         struct qcm_process_device *qpd)
117 {
118         return dqm->dev->kfd2kgd->program_sh_mem_settings(
119                                                 dqm->dev->kgd, qpd->vmid,
120                                                 qpd->sh_mem_config,
121                                                 qpd->sh_mem_ape1_base,
122                                                 qpd->sh_mem_ape1_limit,
123                                                 qpd->sh_mem_bases);
124 }
125
126 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
127 {
128         struct kfd_dev *dev = qpd->dqm->dev;
129
130         if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
131                 /* On pre-SOC15 chips we need to use the queue ID to
132                  * preserve the user mode ABI.
133                  */
134                 q->doorbell_id = q->properties.queue_id;
135         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
136                 /* For SDMA queues on SOC15, use static doorbell
137                  * assignments based on the engine and queue.
138                  */
139                 q->doorbell_id = dev->shared_resources.sdma_doorbell
140                         [q->properties.sdma_engine_id]
141                         [q->properties.sdma_queue_id];
142         } else {
143                 /* For CP queues on SOC15 reserve a free doorbell ID */
144                 unsigned int found;
145
146                 found = find_first_zero_bit(qpd->doorbell_bitmap,
147                                             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
148                 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
149                         pr_debug("No doorbells available");
150                         return -EBUSY;
151                 }
152                 set_bit(found, qpd->doorbell_bitmap);
153                 q->doorbell_id = found;
154         }
155
156         q->properties.doorbell_off =
157                 kfd_doorbell_id_to_offset(dev, q->process,
158                                           q->doorbell_id);
159
160         return 0;
161 }
162
163 static void deallocate_doorbell(struct qcm_process_device *qpd,
164                                 struct queue *q)
165 {
166         unsigned int old;
167         struct kfd_dev *dev = qpd->dqm->dev;
168
169         if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
170             q->properties.type == KFD_QUEUE_TYPE_SDMA)
171                 return;
172
173         old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
174         WARN_ON(!old);
175 }
176
177 static int allocate_vmid(struct device_queue_manager *dqm,
178                         struct qcm_process_device *qpd,
179                         struct queue *q)
180 {
181         int bit, allocated_vmid;
182
183         if (dqm->vmid_bitmap == 0)
184                 return -ENOMEM;
185
186         bit = ffs(dqm->vmid_bitmap) - 1;
187         dqm->vmid_bitmap &= ~(1 << bit);
188
189         allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
190         pr_debug("vmid allocation %d\n", allocated_vmid);
191         qpd->vmid = allocated_vmid;
192         q->properties.vmid = allocated_vmid;
193
194         set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
195         program_sh_mem_settings(dqm, qpd);
196
197         /* qpd->page_table_base is set earlier when register_process()
198          * is called, i.e. when the first queue is created.
199          */
200         dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
201                         qpd->vmid,
202                         qpd->page_table_base);
203         /* invalidate the VM context after pasid and vmid mapping is set up */
204         kfd_flush_tlb(qpd_to_pdd(qpd));
205
206         return 0;
207 }
208
209 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
210                                 struct qcm_process_device *qpd)
211 {
212         const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
213         int ret;
214
215         if (!qpd->ib_kaddr)
216                 return -ENOMEM;
217
218         ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
219         if (ret)
220                 return ret;
221
222         return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
223                                 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
224                                 pmf->release_mem_size / sizeof(uint32_t));
225 }
226
227 static void deallocate_vmid(struct device_queue_manager *dqm,
228                                 struct qcm_process_device *qpd,
229                                 struct queue *q)
230 {
231         int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
232
233         /* On GFX v7, CP doesn't flush TC at dequeue */
234         if (q->device->device_info->asic_family == CHIP_HAWAII)
235                 if (flush_texture_cache_nocpsch(q->device, qpd))
236                         pr_err("Failed to flush TC\n");
237
238         kfd_flush_tlb(qpd_to_pdd(qpd));
239
240         /* Release the vmid mapping */
241         set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
242
243         dqm->vmid_bitmap |= (1 << bit);
244         qpd->vmid = 0;
245         q->properties.vmid = 0;
246 }
247
248 static int create_queue_nocpsch(struct device_queue_manager *dqm,
249                                 struct queue *q,
250                                 struct qcm_process_device *qpd)
251 {
252         int retval;
253
254         print_queue(q);
255
256         dqm_lock(dqm);
257
258         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
259                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
260                                 dqm->total_queue_count);
261                 retval = -EPERM;
262                 goto out_unlock;
263         }
264
265         if (list_empty(&qpd->queues_list)) {
266                 retval = allocate_vmid(dqm, qpd, q);
267                 if (retval)
268                         goto out_unlock;
269         }
270         q->properties.vmid = qpd->vmid;
271         /*
272          * Eviction state logic: we only mark active queues as evicted
273          * to avoid the overhead of restoring inactive queues later
274          */
275         if (qpd->evicted)
276                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
277                                             q->properties.queue_percent > 0 &&
278                                             q->properties.queue_address != 0);
279
280         q->properties.tba_addr = qpd->tba_addr;
281         q->properties.tma_addr = qpd->tma_addr;
282
283         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
284                 retval = create_compute_queue_nocpsch(dqm, q, qpd);
285         else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
286                 retval = create_sdma_queue_nocpsch(dqm, q, qpd);
287         else
288                 retval = -EINVAL;
289
290         if (retval) {
291                 if (list_empty(&qpd->queues_list))
292                         deallocate_vmid(dqm, qpd, q);
293                 goto out_unlock;
294         }
295
296         list_add(&q->list, &qpd->queues_list);
297         qpd->queue_count++;
298         if (q->properties.is_active)
299                 dqm->queue_count++;
300
301         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
302                 dqm->sdma_queue_count++;
303
304         /*
305          * Unconditionally increment this counter, regardless of the queue's
306          * type or whether the queue is active.
307          */
308         dqm->total_queue_count++;
309         pr_debug("Total of %d queues are accountable so far\n",
310                         dqm->total_queue_count);
311
312 out_unlock:
313         dqm_unlock(dqm);
314         return retval;
315 }
316
317 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
318 {
319         bool set;
320         int pipe, bit, i;
321
322         set = false;
323
324         for (pipe = dqm->next_pipe_to_allocate, i = 0;
325                         i < get_pipes_per_mec(dqm);
326                         pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
327
328                 if (!is_pipe_enabled(dqm, 0, pipe))
329                         continue;
330
331                 if (dqm->allocated_queues[pipe] != 0) {
332                         bit = ffs(dqm->allocated_queues[pipe]) - 1;
333                         dqm->allocated_queues[pipe] &= ~(1 << bit);
334                         q->pipe = pipe;
335                         q->queue = bit;
336                         set = true;
337                         break;
338                 }
339         }
340
341         if (!set)
342                 return -EBUSY;
343
344         pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
345         /* horizontal hqd allocation */
346         dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
347
348         return 0;
349 }
350
351 static inline void deallocate_hqd(struct device_queue_manager *dqm,
352                                 struct queue *q)
353 {
354         dqm->allocated_queues[q->pipe] |= (1 << q->queue);
355 }
356
357 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
358                                         struct queue *q,
359                                         struct qcm_process_device *qpd)
360 {
361         int retval;
362         struct mqd_manager *mqd_mgr;
363
364         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
365         if (!mqd_mgr)
366                 return -ENOMEM;
367
368         retval = allocate_hqd(dqm, q);
369         if (retval)
370                 return retval;
371
372         retval = allocate_doorbell(qpd, q);
373         if (retval)
374                 goto out_deallocate_hqd;
375
376         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
377                                 &q->gart_mqd_addr, &q->properties);
378         if (retval)
379                 goto out_deallocate_doorbell;
380
381         pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
382                         q->pipe, q->queue);
383
384         dqm->dev->kfd2kgd->set_scratch_backing_va(
385                         dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
386
387         if (!q->properties.is_active)
388                 return 0;
389
390         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
391                         &q->properties, q->process->mm);
392         if (retval)
393                 goto out_uninit_mqd;
394
395         return 0;
396
397 out_uninit_mqd:
398         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
399 out_deallocate_doorbell:
400         deallocate_doorbell(qpd, q);
401 out_deallocate_hqd:
402         deallocate_hqd(dqm, q);
403
404         return retval;
405 }
406
407 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
408  * to avoid asynchronized access
409  */
410 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
411                                 struct qcm_process_device *qpd,
412                                 struct queue *q)
413 {
414         int retval;
415         struct mqd_manager *mqd_mgr;
416
417         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
418                 get_mqd_type_from_queue_type(q->properties.type));
419         if (!mqd_mgr)
420                 return -ENOMEM;
421
422         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
423                 deallocate_hqd(dqm, q);
424         } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
425                 dqm->sdma_queue_count--;
426                 deallocate_sdma_queue(dqm, q->sdma_id);
427         } else {
428                 pr_debug("q->properties.type %d is invalid\n",
429                                 q->properties.type);
430                 return -EINVAL;
431         }
432         dqm->total_queue_count--;
433
434         deallocate_doorbell(qpd, q);
435
436         retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
437                                 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
438                                 KFD_UNMAP_LATENCY_MS,
439                                 q->pipe, q->queue);
440         if (retval == -ETIME)
441                 qpd->reset_wavefronts = true;
442
443         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
444
445         list_del(&q->list);
446         if (list_empty(&qpd->queues_list)) {
447                 if (qpd->reset_wavefronts) {
448                         pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
449                                         dqm->dev);
450                         /* dbgdev_wave_reset_wavefronts has to be called before
451                          * deallocate_vmid(), i.e. when vmid is still in use.
452                          */
453                         dbgdev_wave_reset_wavefronts(dqm->dev,
454                                         qpd->pqm->process);
455                         qpd->reset_wavefronts = false;
456                 }
457
458                 deallocate_vmid(dqm, qpd, q);
459         }
460         qpd->queue_count--;
461         if (q->properties.is_active)
462                 dqm->queue_count--;
463
464         return retval;
465 }
466
467 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
468                                 struct qcm_process_device *qpd,
469                                 struct queue *q)
470 {
471         int retval;
472
473         dqm_lock(dqm);
474         retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
475         dqm_unlock(dqm);
476
477         return retval;
478 }
479
480 static int update_queue(struct device_queue_manager *dqm, struct queue *q)
481 {
482         int retval;
483         struct mqd_manager *mqd_mgr;
484         struct kfd_process_device *pdd;
485         bool prev_active = false;
486
487         dqm_lock(dqm);
488         pdd = kfd_get_process_device_data(q->device, q->process);
489         if (!pdd) {
490                 retval = -ENODEV;
491                 goto out_unlock;
492         }
493         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
494                         get_mqd_type_from_queue_type(q->properties.type));
495         if (!mqd_mgr) {
496                 retval = -ENOMEM;
497                 goto out_unlock;
498         }
499         /*
500          * Eviction state logic: we only mark active queues as evicted
501          * to avoid the overhead of restoring inactive queues later
502          */
503         if (pdd->qpd.evicted)
504                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
505                                             q->properties.queue_percent > 0 &&
506                                             q->properties.queue_address != 0);
507
508         /* Save previous activity state for counters */
509         prev_active = q->properties.is_active;
510
511         /* Make sure the queue is unmapped before updating the MQD */
512         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
513                 retval = unmap_queues_cpsch(dqm,
514                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
515                 if (retval) {
516                         pr_err("unmap queue failed\n");
517                         goto out_unlock;
518                 }
519         } else if (prev_active &&
520                    (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
521                     q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
522                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
523                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
524                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
525                 if (retval) {
526                         pr_err("destroy mqd failed\n");
527                         goto out_unlock;
528                 }
529         }
530
531         retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
532
533         /*
534          * check active state vs. the previous state and modify
535          * counter accordingly. map_queues_cpsch uses the
536          * dqm->queue_count to determine whether a new runlist must be
537          * uploaded.
538          */
539         if (q->properties.is_active && !prev_active)
540                 dqm->queue_count++;
541         else if (!q->properties.is_active && prev_active)
542                 dqm->queue_count--;
543
544         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
545                 retval = map_queues_cpsch(dqm);
546         else if (q->properties.is_active &&
547                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
548                   q->properties.type == KFD_QUEUE_TYPE_SDMA))
549                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
550                                        &q->properties, q->process->mm);
551
552 out_unlock:
553         dqm_unlock(dqm);
554         return retval;
555 }
556
557 static struct mqd_manager *get_mqd_manager(
558                 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
559 {
560         struct mqd_manager *mqd_mgr;
561
562         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
563                 return NULL;
564
565         pr_debug("mqd type %d\n", type);
566
567         mqd_mgr = dqm->mqd_mgrs[type];
568         if (!mqd_mgr) {
569                 mqd_mgr = mqd_manager_init(type, dqm->dev);
570                 if (!mqd_mgr)
571                         pr_err("mqd manager is NULL");
572                 dqm->mqd_mgrs[type] = mqd_mgr;
573         }
574
575         return mqd_mgr;
576 }
577
578 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
579                                         struct qcm_process_device *qpd)
580 {
581         struct queue *q;
582         struct mqd_manager *mqd_mgr;
583         struct kfd_process_device *pdd;
584         int retval = 0;
585
586         dqm_lock(dqm);
587         if (qpd->evicted++ > 0) /* already evicted, do nothing */
588                 goto out;
589
590         pdd = qpd_to_pdd(qpd);
591         pr_info_ratelimited("Evicting PASID %u queues\n",
592                             pdd->process->pasid);
593
594         /* unactivate all active queues on the qpd */
595         list_for_each_entry(q, &qpd->queues_list, list) {
596                 if (!q->properties.is_active)
597                         continue;
598                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
599                         get_mqd_type_from_queue_type(q->properties.type));
600                 if (!mqd_mgr) { /* should not be here */
601                         pr_err("Cannot evict queue, mqd mgr is NULL\n");
602                         retval = -ENOMEM;
603                         goto out;
604                 }
605                 q->properties.is_evicted = true;
606                 q->properties.is_active = false;
607                 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
608                                 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
609                                 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
610                 if (retval)
611                         goto out;
612                 dqm->queue_count--;
613         }
614
615 out:
616         dqm_unlock(dqm);
617         return retval;
618 }
619
620 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
621                                       struct qcm_process_device *qpd)
622 {
623         struct queue *q;
624         struct kfd_process_device *pdd;
625         int retval = 0;
626
627         dqm_lock(dqm);
628         if (qpd->evicted++ > 0) /* already evicted, do nothing */
629                 goto out;
630
631         pdd = qpd_to_pdd(qpd);
632         pr_info_ratelimited("Evicting PASID %u queues\n",
633                             pdd->process->pasid);
634
635         /* unactivate all active queues on the qpd */
636         list_for_each_entry(q, &qpd->queues_list, list) {
637                 if (!q->properties.is_active)
638                         continue;
639                 q->properties.is_evicted = true;
640                 q->properties.is_active = false;
641                 dqm->queue_count--;
642         }
643         retval = execute_queues_cpsch(dqm,
644                                 qpd->is_debug ?
645                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
646                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
647
648 out:
649         dqm_unlock(dqm);
650         return retval;
651 }
652
653 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
654                                           struct qcm_process_device *qpd)
655 {
656         struct queue *q;
657         struct mqd_manager *mqd_mgr;
658         struct kfd_process_device *pdd;
659         uint32_t pd_base;
660         int retval = 0;
661
662         pdd = qpd_to_pdd(qpd);
663         /* Retrieve PD base */
664         pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
665
666         dqm_lock(dqm);
667         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
668                 goto out;
669         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
670                 qpd->evicted--;
671                 goto out;
672         }
673
674         pr_info_ratelimited("Restoring PASID %u queues\n",
675                             pdd->process->pasid);
676
677         /* Update PD Base in QPD */
678         qpd->page_table_base = pd_base;
679         pr_debug("Updated PD address to 0x%08x\n", pd_base);
680
681         if (!list_empty(&qpd->queues_list)) {
682                 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
683                                 dqm->dev->kgd,
684                                 qpd->vmid,
685                                 qpd->page_table_base);
686                 kfd_flush_tlb(pdd);
687         }
688
689         /* activate all active queues on the qpd */
690         list_for_each_entry(q, &qpd->queues_list, list) {
691                 if (!q->properties.is_evicted)
692                         continue;
693                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
694                         get_mqd_type_from_queue_type(q->properties.type));
695                 if (!mqd_mgr) { /* should not be here */
696                         pr_err("Cannot restore queue, mqd mgr is NULL\n");
697                         retval = -ENOMEM;
698                         goto out;
699                 }
700                 q->properties.is_evicted = false;
701                 q->properties.is_active = true;
702                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
703                                        q->queue, &q->properties,
704                                        q->process->mm);
705                 if (retval)
706                         goto out;
707                 dqm->queue_count++;
708         }
709         qpd->evicted = 0;
710 out:
711         dqm_unlock(dqm);
712         return retval;
713 }
714
715 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
716                                         struct qcm_process_device *qpd)
717 {
718         struct queue *q;
719         struct kfd_process_device *pdd;
720         uint32_t pd_base;
721         int retval = 0;
722
723         pdd = qpd_to_pdd(qpd);
724         /* Retrieve PD base */
725         pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
726
727         dqm_lock(dqm);
728         if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
729                 goto out;
730         if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
731                 qpd->evicted--;
732                 goto out;
733         }
734
735         pr_info_ratelimited("Restoring PASID %u queues\n",
736                             pdd->process->pasid);
737
738         /* Update PD Base in QPD */
739         qpd->page_table_base = pd_base;
740         pr_debug("Updated PD address to 0x%08x\n", pd_base);
741
742         /* activate all active queues on the qpd */
743         list_for_each_entry(q, &qpd->queues_list, list) {
744                 if (!q->properties.is_evicted)
745                         continue;
746                 q->properties.is_evicted = false;
747                 q->properties.is_active = true;
748                 dqm->queue_count++;
749         }
750         retval = execute_queues_cpsch(dqm,
751                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
752         if (!retval)
753                 qpd->evicted = 0;
754 out:
755         dqm_unlock(dqm);
756         return retval;
757 }
758
759 static int register_process(struct device_queue_manager *dqm,
760                                         struct qcm_process_device *qpd)
761 {
762         struct device_process_node *n;
763         struct kfd_process_device *pdd;
764         uint32_t pd_base;
765         int retval;
766
767         n = kzalloc(sizeof(*n), GFP_KERNEL);
768         if (!n)
769                 return -ENOMEM;
770
771         n->qpd = qpd;
772
773         pdd = qpd_to_pdd(qpd);
774         /* Retrieve PD base */
775         pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
776
777         dqm_lock(dqm);
778         list_add(&n->list, &dqm->queues);
779
780         /* Update PD Base in QPD */
781         qpd->page_table_base = pd_base;
782
783         retval = dqm->asic_ops.update_qpd(dqm, qpd);
784
785         if (dqm->processes_count++ == 0)
786                 dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false);
787
788         dqm_unlock(dqm);
789
790         return retval;
791 }
792
793 static int unregister_process(struct device_queue_manager *dqm,
794                                         struct qcm_process_device *qpd)
795 {
796         int retval;
797         struct device_process_node *cur, *next;
798
799         pr_debug("qpd->queues_list is %s\n",
800                         list_empty(&qpd->queues_list) ? "empty" : "not empty");
801
802         retval = 0;
803         dqm_lock(dqm);
804
805         list_for_each_entry_safe(cur, next, &dqm->queues, list) {
806                 if (qpd == cur->qpd) {
807                         list_del(&cur->list);
808                         kfree(cur);
809                         if (--dqm->processes_count == 0)
810                                 dqm->dev->kfd2kgd->set_compute_idle(
811                                         dqm->dev->kgd, true);
812                         goto out;
813                 }
814         }
815         /* qpd not found in dqm list */
816         retval = 1;
817 out:
818         dqm_unlock(dqm);
819         return retval;
820 }
821
822 static int
823 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
824                         unsigned int vmid)
825 {
826         uint32_t pasid_mapping;
827
828         pasid_mapping = (pasid == 0) ? 0 :
829                 (uint32_t)pasid |
830                 ATC_VMID_PASID_MAPPING_VALID;
831
832         return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
833                                                 dqm->dev->kgd, pasid_mapping,
834                                                 vmid);
835 }
836
837 static void init_interrupts(struct device_queue_manager *dqm)
838 {
839         unsigned int i;
840
841         for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
842                 if (is_pipe_enabled(dqm, 0, i))
843                         dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
844 }
845
846 static int initialize_nocpsch(struct device_queue_manager *dqm)
847 {
848         int pipe, queue;
849
850         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
851
852         dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
853                                         sizeof(unsigned int), GFP_KERNEL);
854         if (!dqm->allocated_queues)
855                 return -ENOMEM;
856
857         mutex_init(&dqm->lock_hidden);
858         INIT_LIST_HEAD(&dqm->queues);
859         dqm->queue_count = dqm->next_pipe_to_allocate = 0;
860         dqm->sdma_queue_count = 0;
861
862         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
863                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
864
865                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
866                         if (test_bit(pipe_offset + queue,
867                                      dqm->dev->shared_resources.queue_bitmap))
868                                 dqm->allocated_queues[pipe] |= 1 << queue;
869         }
870
871         dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
872         dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
873
874         return 0;
875 }
876
877 static void uninitialize(struct device_queue_manager *dqm)
878 {
879         int i;
880
881         WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
882
883         kfree(dqm->allocated_queues);
884         for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
885                 kfree(dqm->mqd_mgrs[i]);
886         mutex_destroy(&dqm->lock_hidden);
887         kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
888 }
889
890 static int start_nocpsch(struct device_queue_manager *dqm)
891 {
892         init_interrupts(dqm);
893         return pm_init(&dqm->packets, dqm);
894 }
895
896 static int stop_nocpsch(struct device_queue_manager *dqm)
897 {
898         pm_uninit(&dqm->packets);
899         return 0;
900 }
901
902 static int allocate_sdma_queue(struct device_queue_manager *dqm,
903                                 unsigned int *sdma_queue_id)
904 {
905         int bit;
906
907         if (dqm->sdma_bitmap == 0)
908                 return -ENOMEM;
909
910         bit = ffs(dqm->sdma_bitmap) - 1;
911         dqm->sdma_bitmap &= ~(1 << bit);
912         *sdma_queue_id = bit;
913
914         return 0;
915 }
916
917 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
918                                 unsigned int sdma_queue_id)
919 {
920         if (sdma_queue_id >= get_num_sdma_queues(dqm))
921                 return;
922         dqm->sdma_bitmap |= (1 << sdma_queue_id);
923 }
924
925 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
926                                         struct queue *q,
927                                         struct qcm_process_device *qpd)
928 {
929         struct mqd_manager *mqd_mgr;
930         int retval;
931
932         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
933         if (!mqd_mgr)
934                 return -ENOMEM;
935
936         retval = allocate_sdma_queue(dqm, &q->sdma_id);
937         if (retval)
938                 return retval;
939
940         q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
941         q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
942
943         retval = allocate_doorbell(qpd, q);
944         if (retval)
945                 goto out_deallocate_sdma_queue;
946
947         pr_debug("SDMA id is:    %d\n", q->sdma_id);
948         pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
949         pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
950
951         dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
952         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
953                                 &q->gart_mqd_addr, &q->properties);
954         if (retval)
955                 goto out_deallocate_doorbell;
956
957         retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
958                                 NULL);
959         if (retval)
960                 goto out_uninit_mqd;
961
962         return 0;
963
964 out_uninit_mqd:
965         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
966 out_deallocate_doorbell:
967         deallocate_doorbell(qpd, q);
968 out_deallocate_sdma_queue:
969         deallocate_sdma_queue(dqm, q->sdma_id);
970
971         return retval;
972 }
973
974 /*
975  * Device Queue Manager implementation for cp scheduler
976  */
977
978 static int set_sched_resources(struct device_queue_manager *dqm)
979 {
980         int i, mec;
981         struct scheduling_resources res;
982
983         res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
984
985         res.queue_mask = 0;
986         for (i = 0; i < KGD_MAX_QUEUES; ++i) {
987                 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
988                         / dqm->dev->shared_resources.num_pipe_per_mec;
989
990                 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
991                         continue;
992
993                 /* only acquire queues from the first MEC */
994                 if (mec > 0)
995                         continue;
996
997                 /* This situation may be hit in the future if a new HW
998                  * generation exposes more than 64 queues. If so, the
999                  * definition of res.queue_mask needs updating
1000                  */
1001                 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1002                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1003                         break;
1004                 }
1005
1006                 res.queue_mask |= (1ull << i);
1007         }
1008         res.gws_mask = res.oac_mask = res.gds_heap_base =
1009                                                 res.gds_heap_size = 0;
1010
1011         pr_debug("Scheduling resources:\n"
1012                         "vmid mask: 0x%8X\n"
1013                         "queue mask: 0x%8llX\n",
1014                         res.vmid_mask, res.queue_mask);
1015
1016         return pm_send_set_resources(&dqm->packets, &res);
1017 }
1018
1019 static int initialize_cpsch(struct device_queue_manager *dqm)
1020 {
1021         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1022
1023         mutex_init(&dqm->lock_hidden);
1024         INIT_LIST_HEAD(&dqm->queues);
1025         dqm->queue_count = dqm->processes_count = 0;
1026         dqm->sdma_queue_count = 0;
1027         dqm->active_runlist = false;
1028         dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
1029
1030         INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1031
1032         return 0;
1033 }
1034
1035 static int start_cpsch(struct device_queue_manager *dqm)
1036 {
1037         int retval;
1038
1039         retval = 0;
1040
1041         retval = pm_init(&dqm->packets, dqm);
1042         if (retval)
1043                 goto fail_packet_manager_init;
1044
1045         retval = set_sched_resources(dqm);
1046         if (retval)
1047                 goto fail_set_sched_resources;
1048
1049         pr_debug("Allocating fence memory\n");
1050
1051         /* allocate fence memory on the gart */
1052         retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1053                                         &dqm->fence_mem);
1054
1055         if (retval)
1056                 goto fail_allocate_vidmem;
1057
1058         dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1059         dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1060
1061         init_interrupts(dqm);
1062
1063         dqm_lock(dqm);
1064         /* clear hang status when driver try to start the hw scheduler */
1065         dqm->is_hws_hang = false;
1066         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1067         dqm_unlock(dqm);
1068
1069         return 0;
1070 fail_allocate_vidmem:
1071 fail_set_sched_resources:
1072         pm_uninit(&dqm->packets);
1073 fail_packet_manager_init:
1074         return retval;
1075 }
1076
1077 static int stop_cpsch(struct device_queue_manager *dqm)
1078 {
1079         dqm_lock(dqm);
1080         unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1081         dqm_unlock(dqm);
1082
1083         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1084         pm_uninit(&dqm->packets);
1085
1086         return 0;
1087 }
1088
1089 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1090                                         struct kernel_queue *kq,
1091                                         struct qcm_process_device *qpd)
1092 {
1093         dqm_lock(dqm);
1094         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1095                 pr_warn("Can't create new kernel queue because %d queues were already created\n",
1096                                 dqm->total_queue_count);
1097                 dqm_unlock(dqm);
1098                 return -EPERM;
1099         }
1100
1101         /*
1102          * Unconditionally increment this counter, regardless of the queue's
1103          * type or whether the queue is active.
1104          */
1105         dqm->total_queue_count++;
1106         pr_debug("Total of %d queues are accountable so far\n",
1107                         dqm->total_queue_count);
1108
1109         list_add(&kq->list, &qpd->priv_queue_list);
1110         dqm->queue_count++;
1111         qpd->is_debug = true;
1112         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1113         dqm_unlock(dqm);
1114
1115         return 0;
1116 }
1117
1118 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1119                                         struct kernel_queue *kq,
1120                                         struct qcm_process_device *qpd)
1121 {
1122         dqm_lock(dqm);
1123         list_del(&kq->list);
1124         dqm->queue_count--;
1125         qpd->is_debug = false;
1126         execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1127         /*
1128          * Unconditionally decrement this counter, regardless of the queue's
1129          * type.
1130          */
1131         dqm->total_queue_count--;
1132         pr_debug("Total of %d queues are accountable so far\n",
1133                         dqm->total_queue_count);
1134         dqm_unlock(dqm);
1135 }
1136
1137 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1138                         struct qcm_process_device *qpd)
1139 {
1140         int retval;
1141         struct mqd_manager *mqd_mgr;
1142
1143         retval = 0;
1144
1145         dqm_lock(dqm);
1146
1147         if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1148                 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1149                                 dqm->total_queue_count);
1150                 retval = -EPERM;
1151                 goto out_unlock;
1152         }
1153
1154         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1155                 retval = allocate_sdma_queue(dqm, &q->sdma_id);
1156                 if (retval)
1157                         goto out_unlock;
1158                 q->properties.sdma_queue_id =
1159                         q->sdma_id / get_num_sdma_engines(dqm);
1160                 q->properties.sdma_engine_id =
1161                         q->sdma_id % get_num_sdma_engines(dqm);
1162         }
1163
1164         retval = allocate_doorbell(qpd, q);
1165         if (retval)
1166                 goto out_deallocate_sdma_queue;
1167
1168         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1169                         get_mqd_type_from_queue_type(q->properties.type));
1170
1171         if (!mqd_mgr) {
1172                 retval = -ENOMEM;
1173                 goto out_deallocate_doorbell;
1174         }
1175         /*
1176          * Eviction state logic: we only mark active queues as evicted
1177          * to avoid the overhead of restoring inactive queues later
1178          */
1179         if (qpd->evicted)
1180                 q->properties.is_evicted = (q->properties.queue_size > 0 &&
1181                                             q->properties.queue_percent > 0 &&
1182                                             q->properties.queue_address != 0);
1183
1184         dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1185
1186         q->properties.tba_addr = qpd->tba_addr;
1187         q->properties.tma_addr = qpd->tma_addr;
1188         retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
1189                                 &q->gart_mqd_addr, &q->properties);
1190         if (retval)
1191                 goto out_deallocate_doorbell;
1192
1193         list_add(&q->list, &qpd->queues_list);
1194         qpd->queue_count++;
1195         if (q->properties.is_active) {
1196                 dqm->queue_count++;
1197                 retval = execute_queues_cpsch(dqm,
1198                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1199         }
1200
1201         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1202                 dqm->sdma_queue_count++;
1203         /*
1204          * Unconditionally increment this counter, regardless of the queue's
1205          * type or whether the queue is active.
1206          */
1207         dqm->total_queue_count++;
1208
1209         pr_debug("Total of %d queues are accountable so far\n",
1210                         dqm->total_queue_count);
1211
1212         dqm_unlock(dqm);
1213         return retval;
1214
1215 out_deallocate_doorbell:
1216         deallocate_doorbell(qpd, q);
1217 out_deallocate_sdma_queue:
1218         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1219                 deallocate_sdma_queue(dqm, q->sdma_id);
1220 out_unlock:
1221         dqm_unlock(dqm);
1222
1223         return retval;
1224 }
1225
1226 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1227                                 unsigned int fence_value,
1228                                 unsigned int timeout_ms)
1229 {
1230         unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1231
1232         while (*fence_addr != fence_value) {
1233                 if (time_after(jiffies, end_jiffies)) {
1234                         pr_err("qcm fence wait loop timeout expired\n");
1235                         /* In HWS case, this is used to halt the driver thread
1236                          * in order not to mess up CP states before doing
1237                          * scandumps for FW debugging.
1238                          */
1239                         while (halt_if_hws_hang)
1240                                 schedule();
1241
1242                         return -ETIME;
1243                 }
1244                 schedule();
1245         }
1246
1247         return 0;
1248 }
1249
1250 static int unmap_sdma_queues(struct device_queue_manager *dqm,
1251                                 unsigned int sdma_engine)
1252 {
1253         return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1254                         KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1255                         sdma_engine);
1256 }
1257
1258 /* dqm->lock mutex has to be locked before calling this function */
1259 static int map_queues_cpsch(struct device_queue_manager *dqm)
1260 {
1261         int retval;
1262
1263         if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1264                 return 0;
1265
1266         if (dqm->active_runlist)
1267                 return 0;
1268
1269         retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1270         if (retval) {
1271                 pr_err("failed to execute runlist\n");
1272                 return retval;
1273         }
1274         dqm->active_runlist = true;
1275
1276         return retval;
1277 }
1278
1279 /* dqm->lock mutex has to be locked before calling this function */
1280 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1281                                 enum kfd_unmap_queues_filter filter,
1282                                 uint32_t filter_param)
1283 {
1284         int retval = 0;
1285
1286         if (dqm->is_hws_hang)
1287                 return -EIO;
1288         if (!dqm->active_runlist)
1289                 return retval;
1290
1291         pr_debug("Before destroying queues, sdma queue count is : %u\n",
1292                 dqm->sdma_queue_count);
1293
1294         if (dqm->sdma_queue_count > 0) {
1295                 unmap_sdma_queues(dqm, 0);
1296                 unmap_sdma_queues(dqm, 1);
1297         }
1298
1299         retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1300                         filter, filter_param, false, 0);
1301         if (retval)
1302                 return retval;
1303
1304         *dqm->fence_addr = KFD_FENCE_INIT;
1305         pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1306                                 KFD_FENCE_COMPLETED);
1307         /* should be timed out */
1308         retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1309                                 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1310         if (retval)
1311                 return retval;
1312
1313         pm_release_ib(&dqm->packets);
1314         dqm->active_runlist = false;
1315
1316         return retval;
1317 }
1318
1319 /* dqm->lock mutex has to be locked before calling this function */
1320 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1321                                 enum kfd_unmap_queues_filter filter,
1322                                 uint32_t filter_param)
1323 {
1324         int retval;
1325
1326         if (dqm->is_hws_hang)
1327                 return -EIO;
1328         retval = unmap_queues_cpsch(dqm, filter, filter_param);
1329         if (retval) {
1330                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1331                 dqm->is_hws_hang = true;
1332                 schedule_work(&dqm->hw_exception_work);
1333                 return retval;
1334         }
1335
1336         return map_queues_cpsch(dqm);
1337 }
1338
1339 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1340                                 struct qcm_process_device *qpd,
1341                                 struct queue *q)
1342 {
1343         int retval;
1344         struct mqd_manager *mqd_mgr;
1345         bool preempt_all_queues;
1346
1347         preempt_all_queues = false;
1348
1349         retval = 0;
1350
1351         /* remove queue from list to prevent rescheduling after preemption */
1352         dqm_lock(dqm);
1353
1354         if (qpd->is_debug) {
1355                 /*
1356                  * error, currently we do not allow to destroy a queue
1357                  * of a currently debugged process
1358                  */
1359                 retval = -EBUSY;
1360                 goto failed_try_destroy_debugged_queue;
1361
1362         }
1363
1364         mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1365                         get_mqd_type_from_queue_type(q->properties.type));
1366         if (!mqd_mgr) {
1367                 retval = -ENOMEM;
1368                 goto failed;
1369         }
1370
1371         deallocate_doorbell(qpd, q);
1372
1373         if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1374                 dqm->sdma_queue_count--;
1375                 deallocate_sdma_queue(dqm, q->sdma_id);
1376         }
1377
1378         list_del(&q->list);
1379         qpd->queue_count--;
1380         if (q->properties.is_active) {
1381                 dqm->queue_count--;
1382                 retval = execute_queues_cpsch(dqm,
1383                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1384                 if (retval == -ETIME)
1385                         qpd->reset_wavefronts = true;
1386         }
1387
1388         mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1389
1390         /*
1391          * Unconditionally decrement this counter, regardless of the queue's
1392          * type
1393          */
1394         dqm->total_queue_count--;
1395         pr_debug("Total of %d queues are accountable so far\n",
1396                         dqm->total_queue_count);
1397
1398         dqm_unlock(dqm);
1399
1400         return retval;
1401
1402 failed:
1403 failed_try_destroy_debugged_queue:
1404
1405         dqm_unlock(dqm);
1406         return retval;
1407 }
1408
1409 /*
1410  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1411  * stay in user mode.
1412  */
1413 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1414 /* APE1 limit is inclusive and 64K aligned. */
1415 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1416
1417 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1418                                    struct qcm_process_device *qpd,
1419                                    enum cache_policy default_policy,
1420                                    enum cache_policy alternate_policy,
1421                                    void __user *alternate_aperture_base,
1422                                    uint64_t alternate_aperture_size)
1423 {
1424         bool retval = true;
1425
1426         if (!dqm->asic_ops.set_cache_memory_policy)
1427                 return retval;
1428
1429         dqm_lock(dqm);
1430
1431         if (alternate_aperture_size == 0) {
1432                 /* base > limit disables APE1 */
1433                 qpd->sh_mem_ape1_base = 1;
1434                 qpd->sh_mem_ape1_limit = 0;
1435         } else {
1436                 /*
1437                  * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1438                  *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1439                  * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1440                  *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1441                  * Verify that the base and size parameters can be
1442                  * represented in this format and convert them.
1443                  * Additionally restrict APE1 to user-mode addresses.
1444                  */
1445
1446                 uint64_t base = (uintptr_t)alternate_aperture_base;
1447                 uint64_t limit = base + alternate_aperture_size - 1;
1448
1449                 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1450                    (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1451                         retval = false;
1452                         goto out;
1453                 }
1454
1455                 qpd->sh_mem_ape1_base = base >> 16;
1456                 qpd->sh_mem_ape1_limit = limit >> 16;
1457         }
1458
1459         retval = dqm->asic_ops.set_cache_memory_policy(
1460                         dqm,
1461                         qpd,
1462                         default_policy,
1463                         alternate_policy,
1464                         alternate_aperture_base,
1465                         alternate_aperture_size);
1466
1467         if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1468                 program_sh_mem_settings(dqm, qpd);
1469
1470         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1471                 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1472                 qpd->sh_mem_ape1_limit);
1473
1474 out:
1475         dqm_unlock(dqm);
1476         return retval;
1477 }
1478
1479 static int set_trap_handler(struct device_queue_manager *dqm,
1480                                 struct qcm_process_device *qpd,
1481                                 uint64_t tba_addr,
1482                                 uint64_t tma_addr)
1483 {
1484         uint64_t *tma;
1485
1486         if (dqm->dev->cwsr_enabled) {
1487                 /* Jump from CWSR trap handler to user trap */
1488                 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1489                 tma[0] = tba_addr;
1490                 tma[1] = tma_addr;
1491         } else {
1492                 qpd->tba_addr = tba_addr;
1493                 qpd->tma_addr = tma_addr;
1494         }
1495
1496         return 0;
1497 }
1498
1499 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1500                 struct qcm_process_device *qpd)
1501 {
1502         struct queue *q, *next;
1503         struct device_process_node *cur, *next_dpn;
1504         int retval = 0;
1505
1506         dqm_lock(dqm);
1507
1508         /* Clear all user mode queues */
1509         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1510                 int ret;
1511
1512                 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1513                 if (ret)
1514                         retval = ret;
1515         }
1516
1517         /* Unregister process */
1518         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1519                 if (qpd == cur->qpd) {
1520                         list_del(&cur->list);
1521                         kfree(cur);
1522                         dqm->processes_count--;
1523                         break;
1524                 }
1525         }
1526
1527         dqm_unlock(dqm);
1528         return retval;
1529 }
1530
1531
1532 static int process_termination_cpsch(struct device_queue_manager *dqm,
1533                 struct qcm_process_device *qpd)
1534 {
1535         int retval;
1536         struct queue *q, *next;
1537         struct kernel_queue *kq, *kq_next;
1538         struct mqd_manager *mqd_mgr;
1539         struct device_process_node *cur, *next_dpn;
1540         enum kfd_unmap_queues_filter filter =
1541                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1542
1543         retval = 0;
1544
1545         dqm_lock(dqm);
1546
1547         /* Clean all kernel queues */
1548         list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1549                 list_del(&kq->list);
1550                 dqm->queue_count--;
1551                 qpd->is_debug = false;
1552                 dqm->total_queue_count--;
1553                 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1554         }
1555
1556         /* Clear all user mode queues */
1557         list_for_each_entry(q, &qpd->queues_list, list) {
1558                 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1559                         dqm->sdma_queue_count--;
1560                         deallocate_sdma_queue(dqm, q->sdma_id);
1561                 }
1562
1563                 if (q->properties.is_active)
1564                         dqm->queue_count--;
1565
1566                 dqm->total_queue_count--;
1567         }
1568
1569         /* Unregister process */
1570         list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1571                 if (qpd == cur->qpd) {
1572                         list_del(&cur->list);
1573                         kfree(cur);
1574                         dqm->processes_count--;
1575                         break;
1576                 }
1577         }
1578
1579         retval = execute_queues_cpsch(dqm, filter, 0);
1580         if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1581                 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1582                 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1583                 qpd->reset_wavefronts = false;
1584         }
1585
1586         /* lastly, free mqd resources */
1587         list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1588                 mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1589                         get_mqd_type_from_queue_type(q->properties.type));
1590                 if (!mqd_mgr) {
1591                         retval = -ENOMEM;
1592                         goto out;
1593                 }
1594                 list_del(&q->list);
1595                 qpd->queue_count--;
1596                 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1597         }
1598
1599 out:
1600         dqm_unlock(dqm);
1601         return retval;
1602 }
1603
1604 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1605 {
1606         struct device_queue_manager *dqm;
1607
1608         pr_debug("Loading device queue manager\n");
1609
1610         dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1611         if (!dqm)
1612                 return NULL;
1613
1614         switch (dev->device_info->asic_family) {
1615         /* HWS is not available on Hawaii. */
1616         case CHIP_HAWAII:
1617         /* HWS depends on CWSR for timely dequeue. CWSR is not
1618          * available on Tonga.
1619          *
1620          * FIXME: This argument also applies to Kaveri.
1621          */
1622         case CHIP_TONGA:
1623                 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1624                 break;
1625         default:
1626                 dqm->sched_policy = sched_policy;
1627                 break;
1628         }
1629
1630         dqm->dev = dev;
1631         switch (dqm->sched_policy) {
1632         case KFD_SCHED_POLICY_HWS:
1633         case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1634                 /* initialize dqm for cp scheduling */
1635                 dqm->ops.create_queue = create_queue_cpsch;
1636                 dqm->ops.initialize = initialize_cpsch;
1637                 dqm->ops.start = start_cpsch;
1638                 dqm->ops.stop = stop_cpsch;
1639                 dqm->ops.destroy_queue = destroy_queue_cpsch;
1640                 dqm->ops.update_queue = update_queue;
1641                 dqm->ops.get_mqd_manager = get_mqd_manager;
1642                 dqm->ops.register_process = register_process;
1643                 dqm->ops.unregister_process = unregister_process;
1644                 dqm->ops.uninitialize = uninitialize;
1645                 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1646                 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1647                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1648                 dqm->ops.set_trap_handler = set_trap_handler;
1649                 dqm->ops.process_termination = process_termination_cpsch;
1650                 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1651                 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1652                 break;
1653         case KFD_SCHED_POLICY_NO_HWS:
1654                 /* initialize dqm for no cp scheduling */
1655                 dqm->ops.start = start_nocpsch;
1656                 dqm->ops.stop = stop_nocpsch;
1657                 dqm->ops.create_queue = create_queue_nocpsch;
1658                 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1659                 dqm->ops.update_queue = update_queue;
1660                 dqm->ops.get_mqd_manager = get_mqd_manager;
1661                 dqm->ops.register_process = register_process;
1662                 dqm->ops.unregister_process = unregister_process;
1663                 dqm->ops.initialize = initialize_nocpsch;
1664                 dqm->ops.uninitialize = uninitialize;
1665                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1666                 dqm->ops.set_trap_handler = set_trap_handler;
1667                 dqm->ops.process_termination = process_termination_nocpsch;
1668                 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1669                 dqm->ops.restore_process_queues =
1670                         restore_process_queues_nocpsch;
1671                 break;
1672         default:
1673                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1674                 goto out_free;
1675         }
1676
1677         switch (dev->device_info->asic_family) {
1678         case CHIP_CARRIZO:
1679                 device_queue_manager_init_vi(&dqm->asic_ops);
1680                 break;
1681
1682         case CHIP_KAVERI:
1683                 device_queue_manager_init_cik(&dqm->asic_ops);
1684                 break;
1685
1686         case CHIP_HAWAII:
1687                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1688                 break;
1689
1690         case CHIP_TONGA:
1691         case CHIP_FIJI:
1692         case CHIP_POLARIS10:
1693         case CHIP_POLARIS11:
1694                 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1695                 break;
1696
1697         case CHIP_VEGA10:
1698         case CHIP_RAVEN:
1699                 device_queue_manager_init_v9(&dqm->asic_ops);
1700                 break;
1701         default:
1702                 WARN(1, "Unexpected ASIC family %u",
1703                      dev->device_info->asic_family);
1704                 goto out_free;
1705         }
1706
1707         if (!dqm->ops.initialize(dqm))
1708                 return dqm;
1709
1710 out_free:
1711         kfree(dqm);
1712         return NULL;
1713 }
1714
1715 void device_queue_manager_uninit(struct device_queue_manager *dqm)
1716 {
1717         dqm->ops.uninitialize(dqm);
1718         kfree(dqm);
1719 }
1720
1721 int kfd_process_vm_fault(struct device_queue_manager *dqm,
1722                          unsigned int pasid)
1723 {
1724         struct kfd_process_device *pdd;
1725         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1726         int ret = 0;
1727
1728         if (!p)
1729                 return -EINVAL;
1730         pdd = kfd_get_process_device_data(dqm->dev, p);
1731         if (pdd)
1732                 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1733         kfd_unref_process(p);
1734
1735         return ret;
1736 }
1737
1738 static void kfd_process_hw_exception(struct work_struct *work)
1739 {
1740         struct device_queue_manager *dqm = container_of(work,
1741                         struct device_queue_manager, hw_exception_work);
1742         dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd);
1743 }
1744
1745 #if defined(CONFIG_DEBUG_FS)
1746
1747 static void seq_reg_dump(struct seq_file *m,
1748                          uint32_t (*dump)[2], uint32_t n_regs)
1749 {
1750         uint32_t i, count;
1751
1752         for (i = 0, count = 0; i < n_regs; i++) {
1753                 if (count == 0 ||
1754                     dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1755                         seq_printf(m, "%s    %08x: %08x",
1756                                    i ? "\n" : "",
1757                                    dump[i][0], dump[i][1]);
1758                         count = 7;
1759                 } else {
1760                         seq_printf(m, " %08x", dump[i][1]);
1761                         count--;
1762                 }
1763         }
1764
1765         seq_puts(m, "\n");
1766 }
1767
1768 int dqm_debugfs_hqds(struct seq_file *m, void *data)
1769 {
1770         struct device_queue_manager *dqm = data;
1771         uint32_t (*dump)[2], n_regs;
1772         int pipe, queue;
1773         int r = 0;
1774
1775         r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1776                 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
1777         if (!r) {
1778                 seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
1779                                 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1780                                 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1781                                 KFD_CIK_HIQ_QUEUE);
1782                 seq_reg_dump(m, dump, n_regs);
1783
1784                 kfree(dump);
1785         }
1786
1787         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1788                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1789
1790                 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1791                         if (!test_bit(pipe_offset + queue,
1792                                       dqm->dev->shared_resources.queue_bitmap))
1793                                 continue;
1794
1795                         r = dqm->dev->kfd2kgd->hqd_dump(
1796                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1797                         if (r)
1798                                 break;
1799
1800                         seq_printf(m, "  CP Pipe %d, Queue %d\n",
1801                                   pipe, queue);
1802                         seq_reg_dump(m, dump, n_regs);
1803
1804                         kfree(dump);
1805                 }
1806         }
1807
1808         for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
1809                 for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) {
1810                         r = dqm->dev->kfd2kgd->hqd_sdma_dump(
1811                                 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1812                         if (r)
1813                                 break;
1814
1815                         seq_printf(m, "  SDMA Engine %d, RLC %d\n",
1816                                   pipe, queue);
1817                         seq_reg_dump(m, dump, n_regs);
1818
1819                         kfree(dump);
1820                 }
1821         }
1822
1823         return r;
1824 }
1825
1826 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
1827 {
1828         int r = 0;
1829
1830         dqm_lock(dqm);
1831         dqm->active_runlist = true;
1832         r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1833         dqm_unlock(dqm);
1834
1835         return r;
1836 }
1837
1838 #endif
This page took 0.148786 seconds and 4 git commands to generate.