]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vcn.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26
27 #include <linux/firmware.h>
28 #include <linux/module.h>
29 #include <drm/drmP.h>
30 #include <drm/drm.h>
31
32 #include "amdgpu.h"
33 #include "amdgpu_pm.h"
34 #include "amdgpu_vcn.h"
35 #include "soc15d.h"
36 #include "soc15_common.h"
37
38 #include "vcn/vcn_1_0_offset.h"
39
40 /* 1 second timeout */
41 #define VCN_IDLE_TIMEOUT        msecs_to_jiffies(1000)
42
43 /* Firmware Names */
44 #define FIRMWARE_RAVEN          "amdgpu/raven_vcn.bin"
45
46 MODULE_FIRMWARE(FIRMWARE_RAVEN);
47
48 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
49
50 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
51 {
52         struct amdgpu_ring *ring;
53         struct drm_sched_rq *rq;
54         unsigned long bo_size;
55         const char *fw_name;
56         const struct common_firmware_header *hdr;
57         unsigned version_major, version_minor, family_id;
58         int r;
59
60         INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
61
62         switch (adev->asic_type) {
63         case CHIP_RAVEN:
64                 fw_name = FIRMWARE_RAVEN;
65                 break;
66         default:
67                 return -EINVAL;
68         }
69
70         r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
71         if (r) {
72                 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
73                         fw_name);
74                 return r;
75         }
76
77         r = amdgpu_ucode_validate(adev->vcn.fw);
78         if (r) {
79                 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
80                         fw_name);
81                 release_firmware(adev->vcn.fw);
82                 adev->vcn.fw = NULL;
83                 return r;
84         }
85
86         hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
87         family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
88         version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
89         version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
90         DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
91                 version_major, version_minor, family_id);
92
93
94         bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
95                   +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
96                   +  AMDGPU_VCN_SESSION_SIZE * 40;
97         r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
98                                     AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
99                                     &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
100         if (r) {
101                 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
102                 return r;
103         }
104
105         ring = &adev->vcn.ring_dec;
106         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
107         r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
108                                   rq, amdgpu_sched_jobs, NULL);
109         if (r != 0) {
110                 DRM_ERROR("Failed setting up VCN dec run queue.\n");
111                 return r;
112         }
113
114         ring = &adev->vcn.ring_enc[0];
115         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
116         r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
117                                   rq, amdgpu_sched_jobs, NULL);
118         if (r != 0) {
119                 DRM_ERROR("Failed setting up VCN enc run queue.\n");
120                 return r;
121         }
122
123         return 0;
124 }
125
126 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
127 {
128         int i;
129
130         kfree(adev->vcn.saved_bo);
131
132         drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
133
134         drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
135
136         amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
137                               &adev->vcn.gpu_addr,
138                               (void **)&adev->vcn.cpu_addr);
139
140         amdgpu_ring_fini(&adev->vcn.ring_dec);
141
142         for (i = 0; i < adev->vcn.num_enc_rings; ++i)
143                 amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
144
145         release_firmware(adev->vcn.fw);
146
147         return 0;
148 }
149
150 int amdgpu_vcn_suspend(struct amdgpu_device *adev)
151 {
152         unsigned size;
153         void *ptr;
154
155         if (adev->vcn.vcpu_bo == NULL)
156                 return 0;
157
158         cancel_delayed_work_sync(&adev->vcn.idle_work);
159
160         size = amdgpu_bo_size(adev->vcn.vcpu_bo);
161         ptr = adev->vcn.cpu_addr;
162
163         adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
164         if (!adev->vcn.saved_bo)
165                 return -ENOMEM;
166
167         memcpy_fromio(adev->vcn.saved_bo, ptr, size);
168
169         return 0;
170 }
171
172 int amdgpu_vcn_resume(struct amdgpu_device *adev)
173 {
174         unsigned size;
175         void *ptr;
176
177         if (adev->vcn.vcpu_bo == NULL)
178                 return -EINVAL;
179
180         size = amdgpu_bo_size(adev->vcn.vcpu_bo);
181         ptr = adev->vcn.cpu_addr;
182
183         if (adev->vcn.saved_bo != NULL) {
184                 memcpy_toio(ptr, adev->vcn.saved_bo, size);
185                 kfree(adev->vcn.saved_bo);
186                 adev->vcn.saved_bo = NULL;
187         } else {
188                 const struct common_firmware_header *hdr;
189                 unsigned offset;
190
191                 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
192                 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
193                 memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
194                             le32_to_cpu(hdr->ucode_size_bytes));
195                 size -= le32_to_cpu(hdr->ucode_size_bytes);
196                 ptr += le32_to_cpu(hdr->ucode_size_bytes);
197                 memset_io(ptr, 0, size);
198         }
199
200         return 0;
201 }
202
203 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
204 {
205         struct amdgpu_device *adev =
206                 container_of(work, struct amdgpu_device, vcn.idle_work.work);
207         unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
208
209         if (fences == 0) {
210                 if (adev->pm.dpm_enabled) {
211                         /* might be used when with pg/cg
212                         amdgpu_dpm_enable_uvd(adev, false);
213                         */
214                 }
215         } else {
216                 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
217         }
218 }
219
220 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
221 {
222         struct amdgpu_device *adev = ring->adev;
223         bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
224
225         if (set_clocks && adev->pm.dpm_enabled) {
226                 /* might be used when with pg/cg
227                 amdgpu_dpm_enable_uvd(adev, true);
228                 */
229         }
230 }
231
232 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
233 {
234         schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
235 }
236
237 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
238 {
239         struct amdgpu_device *adev = ring->adev;
240         uint32_t tmp = 0;
241         unsigned i;
242         int r;
243
244         WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
245         r = amdgpu_ring_alloc(ring, 3);
246         if (r) {
247                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
248                           ring->idx, r);
249                 return r;
250         }
251         amdgpu_ring_write(ring,
252                 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
253         amdgpu_ring_write(ring, 0xDEADBEEF);
254         amdgpu_ring_commit(ring);
255         for (i = 0; i < adev->usec_timeout; i++) {
256                 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
257                 if (tmp == 0xDEADBEEF)
258                         break;
259                 DRM_UDELAY(1);
260         }
261
262         if (i < adev->usec_timeout) {
263                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
264                          ring->idx, i);
265         } else {
266                 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
267                           ring->idx, tmp);
268                 r = -EINVAL;
269         }
270         return r;
271 }
272
273 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
274                                    struct amdgpu_bo *bo, bool direct,
275                                    struct dma_fence **fence)
276 {
277         struct amdgpu_device *adev = ring->adev;
278         struct dma_fence *f = NULL;
279         struct amdgpu_job *job;
280         struct amdgpu_ib *ib;
281         uint64_t addr;
282         int i, r;
283
284         r = amdgpu_job_alloc_with_ib(adev, 64, &job);
285         if (r)
286                 goto err;
287
288         ib = &job->ibs[0];
289         addr = amdgpu_bo_gpu_offset(bo);
290         ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
291         ib->ptr[1] = addr;
292         ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
293         ib->ptr[3] = addr >> 32;
294         ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
295         ib->ptr[5] = 0;
296         for (i = 6; i < 16; i += 2) {
297                 ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
298                 ib->ptr[i+1] = 0;
299         }
300         ib->length_dw = 16;
301
302         if (direct) {
303                 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
304                 job->fence = dma_fence_get(f);
305                 if (r)
306                         goto err_free;
307
308                 amdgpu_job_free(job);
309         } else {
310                 r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
311                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
312                 if (r)
313                         goto err_free;
314         }
315
316         amdgpu_bo_fence(bo, f, false);
317         amdgpu_bo_unreserve(bo);
318         amdgpu_bo_unref(&bo);
319
320         if (fence)
321                 *fence = dma_fence_get(f);
322         dma_fence_put(f);
323
324         return 0;
325
326 err_free:
327         amdgpu_job_free(job);
328
329 err:
330         amdgpu_bo_unreserve(bo);
331         amdgpu_bo_unref(&bo);
332         return r;
333 }
334
335 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
336                               struct dma_fence **fence)
337 {
338         struct amdgpu_device *adev = ring->adev;
339         struct amdgpu_bo *bo = NULL;
340         uint32_t *msg;
341         int r, i;
342
343         r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
344                                       AMDGPU_GEM_DOMAIN_VRAM,
345                                       &bo, NULL, (void **)&msg);
346         if (r)
347                 return r;
348
349         msg[0] = cpu_to_le32(0x00000028);
350         msg[1] = cpu_to_le32(0x00000038);
351         msg[2] = cpu_to_le32(0x00000001);
352         msg[3] = cpu_to_le32(0x00000000);
353         msg[4] = cpu_to_le32(handle);
354         msg[5] = cpu_to_le32(0x00000000);
355         msg[6] = cpu_to_le32(0x00000001);
356         msg[7] = cpu_to_le32(0x00000028);
357         msg[8] = cpu_to_le32(0x00000010);
358         msg[9] = cpu_to_le32(0x00000000);
359         msg[10] = cpu_to_le32(0x00000007);
360         msg[11] = cpu_to_le32(0x00000000);
361         msg[12] = cpu_to_le32(0x00000780);
362         msg[13] = cpu_to_le32(0x00000440);
363         for (i = 14; i < 1024; ++i)
364                 msg[i] = cpu_to_le32(0x0);
365
366         return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
367 }
368
369 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
370                                bool direct, struct dma_fence **fence)
371 {
372         struct amdgpu_device *adev = ring->adev;
373         struct amdgpu_bo *bo = NULL;
374         uint32_t *msg;
375         int r, i;
376
377         r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
378                                       AMDGPU_GEM_DOMAIN_VRAM,
379                                       &bo, NULL, (void **)&msg);
380         if (r)
381                 return r;
382
383         msg[0] = cpu_to_le32(0x00000028);
384         msg[1] = cpu_to_le32(0x00000018);
385         msg[2] = cpu_to_le32(0x00000000);
386         msg[3] = cpu_to_le32(0x00000002);
387         msg[4] = cpu_to_le32(handle);
388         msg[5] = cpu_to_le32(0x00000000);
389         for (i = 6; i < 1024; ++i)
390                 msg[i] = cpu_to_le32(0x0);
391
392         return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
393 }
394
395 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
396 {
397         struct dma_fence *fence;
398         long r;
399
400         r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
401         if (r) {
402                 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
403                 goto error;
404         }
405
406         r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
407         if (r) {
408                 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
409                 goto error;
410         }
411
412         r = dma_fence_wait_timeout(fence, false, timeout);
413         if (r == 0) {
414                 DRM_ERROR("amdgpu: IB test timed out.\n");
415                 r = -ETIMEDOUT;
416         } else if (r < 0) {
417                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
418         } else {
419                 DRM_DEBUG("ib test on ring %d succeeded\n",  ring->idx);
420                 r = 0;
421         }
422
423         dma_fence_put(fence);
424
425 error:
426         return r;
427 }
428
429 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
430 {
431         struct amdgpu_device *adev = ring->adev;
432         uint32_t rptr = amdgpu_ring_get_rptr(ring);
433         unsigned i;
434         int r;
435
436         r = amdgpu_ring_alloc(ring, 16);
437         if (r) {
438                 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
439                           ring->idx, r);
440                 return r;
441         }
442         amdgpu_ring_write(ring, VCN_ENC_CMD_END);
443         amdgpu_ring_commit(ring);
444
445         for (i = 0; i < adev->usec_timeout; i++) {
446                 if (amdgpu_ring_get_rptr(ring) != rptr)
447                         break;
448                 DRM_UDELAY(1);
449         }
450
451         if (i < adev->usec_timeout) {
452                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
453                          ring->idx, i);
454         } else {
455                 DRM_ERROR("amdgpu: ring %d test failed\n",
456                           ring->idx);
457                 r = -ETIMEDOUT;
458         }
459
460         return r;
461 }
462
463 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
464                               struct dma_fence **fence)
465 {
466         const unsigned ib_size_dw = 16;
467         struct amdgpu_job *job;
468         struct amdgpu_ib *ib;
469         struct dma_fence *f = NULL;
470         uint64_t dummy;
471         int i, r;
472
473         r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
474         if (r)
475                 return r;
476
477         ib = &job->ibs[0];
478         dummy = ib->gpu_addr + 1024;
479
480         ib->length_dw = 0;
481         ib->ptr[ib->length_dw++] = 0x00000018;
482         ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
483         ib->ptr[ib->length_dw++] = handle;
484         ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
485         ib->ptr[ib->length_dw++] = dummy;
486         ib->ptr[ib->length_dw++] = 0x0000000b;
487
488         ib->ptr[ib->length_dw++] = 0x00000014;
489         ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
490         ib->ptr[ib->length_dw++] = 0x0000001c;
491         ib->ptr[ib->length_dw++] = 0x00000000;
492         ib->ptr[ib->length_dw++] = 0x00000000;
493
494         ib->ptr[ib->length_dw++] = 0x00000008;
495         ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
496
497         for (i = ib->length_dw; i < ib_size_dw; ++i)
498                 ib->ptr[i] = 0x0;
499
500         r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
501         job->fence = dma_fence_get(f);
502         if (r)
503                 goto err;
504
505         amdgpu_job_free(job);
506         if (fence)
507                 *fence = dma_fence_get(f);
508         dma_fence_put(f);
509
510         return 0;
511
512 err:
513         amdgpu_job_free(job);
514         return r;
515 }
516
517 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
518                                 struct dma_fence **fence)
519 {
520         const unsigned ib_size_dw = 16;
521         struct amdgpu_job *job;
522         struct amdgpu_ib *ib;
523         struct dma_fence *f = NULL;
524         uint64_t dummy;
525         int i, r;
526
527         r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
528         if (r)
529                 return r;
530
531         ib = &job->ibs[0];
532         dummy = ib->gpu_addr + 1024;
533
534         ib->length_dw = 0;
535         ib->ptr[ib->length_dw++] = 0x00000018;
536         ib->ptr[ib->length_dw++] = 0x00000001;
537         ib->ptr[ib->length_dw++] = handle;
538         ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
539         ib->ptr[ib->length_dw++] = dummy;
540         ib->ptr[ib->length_dw++] = 0x0000000b;
541
542         ib->ptr[ib->length_dw++] = 0x00000014;
543         ib->ptr[ib->length_dw++] = 0x00000002;
544         ib->ptr[ib->length_dw++] = 0x0000001c;
545         ib->ptr[ib->length_dw++] = 0x00000000;
546         ib->ptr[ib->length_dw++] = 0x00000000;
547
548         ib->ptr[ib->length_dw++] = 0x00000008;
549         ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
550
551         for (i = ib->length_dw; i < ib_size_dw; ++i)
552                 ib->ptr[i] = 0x0;
553
554         r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
555         job->fence = dma_fence_get(f);
556         if (r)
557                 goto err;
558
559         amdgpu_job_free(job);
560         if (fence)
561                 *fence = dma_fence_get(f);
562         dma_fence_put(f);
563
564         return 0;
565
566 err:
567         amdgpu_job_free(job);
568         return r;
569 }
570
571 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
572 {
573         struct dma_fence *fence = NULL;
574         long r;
575
576         r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
577         if (r) {
578                 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
579                 goto error;
580         }
581
582         r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
583         if (r) {
584                 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
585                 goto error;
586         }
587
588         r = dma_fence_wait_timeout(fence, false, timeout);
589         if (r == 0) {
590                 DRM_ERROR("amdgpu: IB test timed out.\n");
591                 r = -ETIMEDOUT;
592         } else if (r < 0) {
593                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
594         } else {
595                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
596                 r = 0;
597         }
598 error:
599         dma_fence_put(fence);
600         return r;
601 }
This page took 0.065633 seconds and 4 git commands to generate.