1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
4 #include "pvr_context.h"
5 #include "pvr_device.h"
11 #include "pvr_power.h"
12 #include "pvr_rogue_fwif.h"
13 #include "pvr_rogue_fwif_client.h"
14 #include "pvr_stream.h"
15 #include "pvr_stream_defs.h"
18 #include <drm/drm_exec.h>
19 #include <drm/drm_gem.h>
20 #include <linux/types.h>
21 #include <uapi/drm/pvr_drm.h>
23 static void pvr_job_release(struct kref *kref)
25 struct pvr_job *job = container_of(kref, struct pvr_job, ref_count);
27 xa_erase(&job->pvr_dev->job_ids, job->id);
29 pvr_hwrt_data_put(job->hwrt);
30 pvr_context_put(job->ctx);
32 WARN_ON(job->paired_job);
34 pvr_queue_job_cleanup(job);
35 pvr_job_release_pm_ref(job);
42 * pvr_job_put() - Release reference on job
46 pvr_job_put(struct pvr_job *job)
49 kref_put(&job->ref_count, pvr_job_release);
53 * pvr_job_process_stream() - Build job FW structure from stream
54 * @pvr_dev: Device pointer.
55 * @cmd_defs: Stream definition.
56 * @stream: Pointer to command stream.
57 * @stream_size: Size of command stream, in bytes.
58 * @job: Pointer to job.
60 * Caller is responsible for freeing the output structure.
64 * * -%ENOMEM on out of memory, or
65 * * -%EINVAL on malformed stream.
68 pvr_job_process_stream(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
69 void *stream, u32 stream_size, struct pvr_job *job)
73 job->cmd = kzalloc(cmd_defs->dest_size, GFP_KERNEL);
77 job->cmd_len = cmd_defs->dest_size;
79 err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, job->cmd);
86 static int pvr_fw_cmd_init(struct pvr_device *pvr_dev, struct pvr_job *job,
87 const struct pvr_stream_cmd_defs *stream_def,
88 u64 stream_userptr, u32 stream_len)
93 stream = kzalloc(stream_len, GFP_KERNEL);
97 if (copy_from_user(stream, u64_to_user_ptr(stream_userptr), stream_len)) {
102 err = pvr_job_process_stream(pvr_dev, stream_def, stream, stream_len, job);
111 convert_geom_flags(u32 in_flags)
115 if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST)
116 out_flags |= ROGUE_GEOM_FLAGS_FIRSTKICK;
117 if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST)
118 out_flags |= ROGUE_GEOM_FLAGS_LASTKICK;
119 if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE)
120 out_flags |= ROGUE_GEOM_FLAGS_SINGLE_CORE;
126 convert_frag_flags(u32 in_flags)
130 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE)
131 out_flags |= ROGUE_FRAG_FLAGS_SINGLE_CORE;
132 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER)
133 out_flags |= ROGUE_FRAG_FLAGS_DEPTHBUFFER;
134 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER)
135 out_flags |= ROGUE_FRAG_FLAGS_STENCILBUFFER;
136 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP)
137 out_flags |= ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP;
138 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER)
139 out_flags |= ROGUE_FRAG_FLAGS_SCRATCHBUFFER;
140 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS)
141 out_flags |= ROGUE_FRAG_FLAGS_GET_VIS_RESULTS;
142 if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE)
143 out_flags |= ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE;
149 pvr_geom_job_fw_cmd_init(struct pvr_job *job,
150 struct drm_pvr_job *args)
152 struct rogue_fwif_cmd_geom *cmd;
155 if (args->flags & ~DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK)
158 if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
164 job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_GEOM;
165 err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_geom_stream,
166 args->cmd_stream, args->cmd_stream_len);
171 cmd->cmd_shared.cmn.frame_num = 0;
172 cmd->flags = convert_geom_flags(args->flags);
173 pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
178 pvr_frag_job_fw_cmd_init(struct pvr_job *job,
179 struct drm_pvr_job *args)
181 struct rogue_fwif_cmd_frag *cmd;
184 if (args->flags & ~DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK)
187 if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
193 job->fw_ccb_cmd_type = (args->flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER) ?
194 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR :
195 ROGUE_FWIF_CCB_CMD_TYPE_FRAG;
196 err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_frag_stream,
197 args->cmd_stream, args->cmd_stream_len);
202 cmd->cmd_shared.cmn.frame_num = 0;
203 cmd->flags = convert_frag_flags(args->flags);
204 pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
209 convert_compute_flags(u32 in_flags)
213 if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP)
214 out_flags |= ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
215 if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE)
216 out_flags |= ROGUE_COMPUTE_FLAG_SINGLE_CORE;
222 pvr_compute_job_fw_cmd_init(struct pvr_job *job,
223 struct drm_pvr_job *args)
225 struct rogue_fwif_cmd_compute *cmd;
228 if (args->flags & ~DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK)
231 if (job->ctx->type != DRM_PVR_CTX_TYPE_COMPUTE)
234 job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_CDM;
235 err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_compute_stream,
236 args->cmd_stream, args->cmd_stream_len);
241 cmd->common.frame_num = 0;
242 cmd->flags = convert_compute_flags(args->flags);
247 convert_transfer_flags(u32 in_flags)
251 if (in_flags & DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE)
252 out_flags |= ROGUE_TRANSFER_FLAGS_SINGLE_CORE;
258 pvr_transfer_job_fw_cmd_init(struct pvr_job *job,
259 struct drm_pvr_job *args)
261 struct rogue_fwif_cmd_transfer *cmd;
264 if (args->flags & ~DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK)
267 if (job->ctx->type != DRM_PVR_CTX_TYPE_TRANSFER_FRAG)
270 job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D;
271 err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_transfer_stream,
272 args->cmd_stream, args->cmd_stream_len);
277 cmd->common.frame_num = 0;
278 cmd->flags = convert_transfer_flags(args->flags);
283 pvr_job_fw_cmd_init(struct pvr_job *job,
284 struct drm_pvr_job *args)
286 switch (args->type) {
287 case DRM_PVR_JOB_TYPE_GEOMETRY:
288 return pvr_geom_job_fw_cmd_init(job, args);
290 case DRM_PVR_JOB_TYPE_FRAGMENT:
291 return pvr_frag_job_fw_cmd_init(job, args);
293 case DRM_PVR_JOB_TYPE_COMPUTE:
294 return pvr_compute_job_fw_cmd_init(job, args);
296 case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
297 return pvr_transfer_job_fw_cmd_init(job, args);
305 * struct pvr_job_data - Helper container for pairing jobs with the
306 * sync_ops supplied for them by the user.
308 struct pvr_job_data {
309 /** @job: Pointer to the job. */
312 /** @sync_ops: Pointer to the sync_ops associated with @job. */
313 struct drm_pvr_sync_op *sync_ops;
315 /** @sync_op_count: Number of members of @sync_ops. */
320 * prepare_job_syncs() - Prepare all sync objects for a single job.
321 * @pvr_file: PowerVR file.
322 * @job_data: Precreated job and sync_ops array.
323 * @signal_array: xarray to receive signal sync objects.
327 * * Any error code returned by pvr_sync_signal_array_collect_ops(),
328 * pvr_sync_add_deps_to_job(), drm_sched_job_add_resv_dependencies() or
329 * pvr_sync_signal_array_update_fences().
332 prepare_job_syncs(struct pvr_file *pvr_file,
333 struct pvr_job_data *job_data,
334 struct xarray *signal_array)
336 struct dma_fence *done_fence;
337 int err = pvr_sync_signal_array_collect_ops(signal_array,
338 from_pvr_file(pvr_file),
339 job_data->sync_op_count,
345 err = pvr_sync_add_deps_to_job(pvr_file, &job_data->job->base,
346 job_data->sync_op_count,
347 job_data->sync_ops, signal_array);
351 if (job_data->job->hwrt) {
352 /* The geometry job writes the HWRT region headers, which are
353 * then read by the fragment job.
355 struct drm_gem_object *obj =
356 gem_from_pvr_gem(job_data->job->hwrt->fw_obj->gem);
357 enum dma_resv_usage usage =
358 dma_resv_usage_rw(job_data->job->type ==
359 DRM_PVR_JOB_TYPE_GEOMETRY);
361 dma_resv_lock(obj->resv, NULL);
362 err = drm_sched_job_add_resv_dependencies(&job_data->job->base,
364 dma_resv_unlock(obj->resv);
369 /* We need to arm the job to get the job done fence. */
370 done_fence = pvr_queue_job_arm(job_data->job);
372 err = pvr_sync_signal_array_update_fences(signal_array,
373 job_data->sync_op_count,
380 * prepare_job_syncs_for_each() - Prepare all sync objects for an array of jobs.
381 * @pvr_file: PowerVR file.
382 * @job_data: Array of precreated jobs and their sync_ops.
383 * @job_count: Number of jobs.
384 * @signal_array: xarray to receive signal sync objects.
388 * * Any error code returned by pvr_vm_bind_job_prepare_syncs().
391 prepare_job_syncs_for_each(struct pvr_file *pvr_file,
392 struct pvr_job_data *job_data,
394 struct xarray *signal_array)
396 for (u32 i = 0; i < *job_count; i++) {
397 int err = prepare_job_syncs(pvr_file, &job_data[i],
409 static struct pvr_job *
410 create_job(struct pvr_device *pvr_dev,
411 struct pvr_file *pvr_file,
412 struct drm_pvr_job *args)
414 struct pvr_job *job = NULL;
417 if (!args->cmd_stream || !args->cmd_stream_len)
418 return ERR_PTR(-EINVAL);
420 if (args->type != DRM_PVR_JOB_TYPE_GEOMETRY &&
421 args->type != DRM_PVR_JOB_TYPE_FRAGMENT &&
422 (args->hwrt.set_handle || args->hwrt.data_index))
423 return ERR_PTR(-EINVAL);
425 job = kzalloc(sizeof(*job), GFP_KERNEL);
427 return ERR_PTR(-ENOMEM);
429 kref_init(&job->ref_count);
430 job->type = args->type;
431 job->pvr_dev = pvr_dev;
433 err = xa_alloc(&pvr_dev->job_ids, &job->id, job, xa_limit_32b, GFP_KERNEL);
437 job->ctx = pvr_context_lookup(pvr_file, args->context_handle);
443 if (args->hwrt.set_handle) {
444 job->hwrt = pvr_hwrt_data_lookup(pvr_file, args->hwrt.set_handle,
445 args->hwrt.data_index);
452 err = pvr_job_fw_cmd_init(job, args);
456 err = pvr_queue_job_init(job);
468 * pvr_job_data_fini() - Cleanup all allocs used to set up job submission.
469 * @job_data: Job data array.
470 * @job_count: Number of members of @job_data.
473 pvr_job_data_fini(struct pvr_job_data *job_data, u32 job_count)
475 for (u32 i = 0; i < job_count; i++) {
476 pvr_job_put(job_data[i].job);
477 kvfree(job_data[i].sync_ops);
482 * pvr_job_data_init() - Init an array of created jobs, associating them with
483 * the appropriate sync_ops args, which will be copied in.
484 * @pvr_dev: Target PowerVR device.
485 * @pvr_file: Pointer to PowerVR file structure.
486 * @job_args: Job args array copied from user.
487 * @job_count: Number of members of @job_args.
488 * @job_data_out: Job data array.
490 static int pvr_job_data_init(struct pvr_device *pvr_dev,
491 struct pvr_file *pvr_file,
492 struct drm_pvr_job *job_args,
494 struct pvr_job_data *job_data_out)
498 for (; i < *job_count; i++) {
499 job_data_out[i].job =
500 create_job(pvr_dev, pvr_file, &job_args[i]);
501 err = PTR_ERR_OR_ZERO(job_data_out[i].job);
505 job_data_out[i].job = NULL;
509 err = PVR_UOBJ_GET_ARRAY(job_data_out[i].sync_ops,
510 &job_args[i].sync_ops);
514 /* Ensure the job created above is also cleaned up. */
519 job_data_out[i].sync_op_count = job_args[i].sync_ops.count;
525 pvr_job_data_fini(job_data_out, i);
531 push_jobs(struct pvr_job_data *job_data, u32 job_count)
533 for (u32 i = 0; i < job_count; i++)
534 pvr_queue_job_push(job_data[i].job);
538 prepare_fw_obj_resv(struct drm_exec *exec, struct pvr_fw_object *fw_obj)
540 return drm_exec_prepare_obj(exec, gem_from_pvr_gem(fw_obj->gem), 1);
544 jobs_lock_all_objs(struct drm_exec *exec, struct pvr_job_data *job_data,
547 for (u32 i = 0; i < job_count; i++) {
548 struct pvr_job *job = job_data[i].job;
550 /* Grab a lock on a the context, to guard against
551 * concurrent submission to the same queue.
553 int err = drm_exec_lock_obj(exec,
554 gem_from_pvr_gem(job->ctx->fw_obj->gem));
560 err = prepare_fw_obj_resv(exec,
571 prepare_job_resvs_for_each(struct drm_exec *exec, struct pvr_job_data *job_data,
574 drm_exec_until_all_locked(exec) {
575 int err = jobs_lock_all_objs(exec, job_data, job_count);
577 drm_exec_retry_on_contention(exec);
586 update_job_resvs(struct pvr_job *job)
589 enum dma_resv_usage usage = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
590 DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ;
591 struct drm_gem_object *obj = gem_from_pvr_gem(job->hwrt->fw_obj->gem);
593 dma_resv_add_fence(obj->resv, &job->base.s_fence->finished, usage);
598 update_job_resvs_for_each(struct pvr_job_data *job_data, u32 job_count)
600 for (u32 i = 0; i < job_count; i++)
601 update_job_resvs(job_data[i].job);
604 static bool can_combine_jobs(struct pvr_job *a, struct pvr_job *b)
606 struct pvr_job *geom_job = a, *frag_job = b;
607 struct dma_fence *fence;
610 /* Geometry and fragment jobs can be combined if they are queued to the
611 * same context and targeting the same HWRT.
613 if (a->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
614 b->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
619 xa_for_each(&frag_job->base.dependencies, index, fence) {
620 /* We combine when we see an explicit geom -> frag dep. */
621 if (&geom_job->base.s_fence->scheduled == fence)
628 static struct dma_fence *
629 get_last_queued_job_scheduled_fence(struct pvr_queue *queue,
630 struct pvr_job_data *job_data,
633 /* We iterate over the current job array in reverse order to grab the
634 * last to-be-queued job targeting the same queue.
636 for (u32 i = cur_job_pos; i > 0; i--) {
637 struct pvr_job *job = job_data[i - 1].job;
639 if (job->ctx == queue->ctx && job->type == queue->type)
640 return dma_fence_get(&job->base.s_fence->scheduled);
643 /* If we didn't find any, we just return the last queued job scheduled
644 * fence attached to the queue.
646 return dma_fence_get(queue->last_queued_job_scheduled_fence);
650 pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count)
652 for (u32 i = 0; i < *job_count - 1; i++) {
653 struct pvr_job *geom_job = job_data[i].job;
654 struct pvr_job *frag_job = job_data[i + 1].job;
655 struct pvr_queue *frag_queue;
658 if (!can_combine_jobs(job_data[i].job, job_data[i + 1].job))
661 /* The fragment job will be submitted by the geometry queue. We
662 * need to make sure it comes after all the other fragment jobs
665 frag_queue = pvr_context_get_queue_for_job(frag_job->ctx,
667 f = get_last_queued_job_scheduled_fence(frag_queue, job_data,
670 int err = drm_sched_job_add_dependency(&geom_job->base,
678 /* The KCCB slot will be reserved by the geometry job, so we can
679 * drop the KCCB fence on the fragment job.
681 pvr_kccb_fence_put(frag_job->kccb_fence);
682 frag_job->kccb_fence = NULL;
684 geom_job->paired_job = frag_job;
685 frag_job->paired_job = geom_job;
687 /* Skip the fragment job we just paired to the geometry job. */
695 * pvr_submit_jobs() - Submit jobs to the GPU
696 * @pvr_dev: Target PowerVR device.
697 * @pvr_file: Pointer to PowerVR file structure.
700 * This initial implementation is entirely synchronous; on return the GPU will
701 * be idle. This will not be the case for future implementations.
705 * * -%EFAULT if arguments can not be copied from user space, or
706 * * -%EINVAL on invalid arguments, or
710 pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
711 struct drm_pvr_ioctl_submit_jobs_args *args)
713 struct pvr_job_data *job_data = NULL;
714 struct drm_pvr_job *job_args;
715 struct xarray signal_array;
716 u32 jobs_alloced = 0;
717 struct drm_exec exec;
720 if (!args->jobs.count)
723 err = PVR_UOBJ_GET_ARRAY(job_args, &args->jobs);
727 job_data = kvmalloc_array(args->jobs.count, sizeof(*job_data),
728 GFP_KERNEL | __GFP_ZERO);
734 err = pvr_job_data_init(pvr_dev, pvr_file, job_args, &args->jobs.count,
739 jobs_alloced = args->jobs.count;
742 * Flush MMU if needed - this has been deferred until now to avoid
743 * overuse of this expensive operation.
745 err = pvr_mmu_flush_exec(pvr_dev, false);
747 goto out_job_data_cleanup;
749 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0);
751 xa_init_flags(&signal_array, XA_FLAGS_ALLOC);
753 err = prepare_job_syncs_for_each(pvr_file, job_data, &args->jobs.count,
758 err = prepare_job_resvs_for_each(&exec, job_data, args->jobs.count);
762 err = pvr_jobs_link_geom_frag(job_data, &args->jobs.count);
766 /* Anything after that point must succeed because we start exposing job
767 * finished fences to the outside world.
769 update_job_resvs_for_each(job_data, args->jobs.count);
770 push_jobs(job_data, args->jobs.count);
771 pvr_sync_signal_array_push_fences(&signal_array);
775 drm_exec_fini(&exec);
776 pvr_sync_signal_array_cleanup(&signal_array);
778 out_job_data_cleanup:
779 pvr_job_data_fini(job_data, jobs_alloced);