drivers/gpu/drm/v3d/v3d_gem.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /* Copyright (C) 2014-2018 Broadcom */
   3
   4 #include <linux/device.h>
   5 #include <linux/dma-mapping.h>
   6 #include <linux/io.h>
   7 #include <linux/module.h>
   8 #include <linux/platform_device.h>
   9 #include <linux/pm_runtime.h>
  10 #include <linux/reset.h>
  11 #include <linux/sched/signal.h>
  12 #include <linux/uaccess.h>
  13
  14 #include <drm/drm_syncobj.h>
  15 #include <uapi/drm/v3d_drm.h>
  16
  17 #include "v3d_drv.h"
  18 #include "v3d_regs.h"
  19 #include "v3d_trace.h"
  20
  21 static void
  22 v3d_init_core(struct v3d_dev *v3d, int core)
  23 {
  24         /* Set OVRTMUOUT, which means that the texture sampler uniform
  25          * configuration's tmu output type field is used, instead of
  26          * using the hardware default behavior based on the texture
  27          * type.  If you want the default behavior, you can still put
  28          * "2" in the indirect texture state's output_type field.
  29          */
  30         if (v3d->ver < 40)
  31                 V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
  32
  33         /* Whenever we flush the L2T cache, we always want to flush
  34          * the whole thing.
  35          */
  36         V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
  37         V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
  38 }
  39
  40 /* Sets invariant state for the HW. */
  41 static void
  42 v3d_init_hw_state(struct v3d_dev *v3d)
  43 {
  44         v3d_init_core(v3d, 0);
  45 }
  46
  47 static void
  48 v3d_idle_axi(struct v3d_dev *v3d, int core)
  49 {
  50         V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
  51
  52         if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
  53                       (V3D_GMP_STATUS_RD_COUNT_MASK |
  54                        V3D_GMP_STATUS_WR_COUNT_MASK |
  55                        V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
  56                 DRM_ERROR("Failed to wait for safe GMP shutdown\n");
  57         }
  58 }
  59
  60 static void
  61 v3d_idle_gca(struct v3d_dev *v3d)
  62 {
  63         if (v3d->ver >= 41)
  64                 return;
  65
  66         V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
  67
  68         if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
  69                       V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
  70                      V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
  71                 DRM_ERROR("Failed to wait for safe GCA shutdown\n");
  72         }
  73 }
  74
  75 static void
  76 v3d_reset_by_bridge(struct v3d_dev *v3d)
  77 {
  78         int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
  79
  80         if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
  81                 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
  82                                  V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
  83                 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
  84
  85                 /* GFXH-1383: The SW_INIT may cause a stray write to address 0
  86                  * of the unit, so reset it to its power-on value here.
  87                  */
  88                 V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
  89         } else {
  90                 WARN_ON_ONCE(V3D_GET_FIELD(version,
  91                                            V3D_TOP_GR_BRIDGE_MAJOR) != 7);
  92                 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
  93                                  V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
  94                 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
  95         }
  96 }
  97
  98 static void
  99 v3d_reset_v3d(struct v3d_dev *v3d)
 100 {
 101         if (v3d->reset)
 102                 reset_control_reset(v3d->reset);
 103         else
 104                 v3d_reset_by_bridge(v3d);
 105
 106         v3d_init_hw_state(v3d);
 107 }
 108
 109 void
 110 v3d_reset(struct v3d_dev *v3d)
 111 {
 112         struct drm_device *dev = &v3d->drm;
 113
 114         DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
 115         DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
 116                       V3D_CORE_READ(0, V3D_ERR_STAT));
 117         trace_v3d_reset_begin(dev);
 118
 119         /* XXX: only needed for safe powerdown, not reset. */
 120         if (false)
 121                 v3d_idle_axi(v3d, 0);
 122
 123         v3d_idle_gca(v3d);
 124         v3d_reset_v3d(v3d);
 125
 126         v3d_mmu_set_page_table(v3d);
 127         v3d_irq_reset(v3d);
 128
 129         v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
 130
 131         trace_v3d_reset_end(dev);
 132 }
 133
 134 static void
 135 v3d_flush_l3(struct v3d_dev *v3d)
 136 {
 137         if (v3d->ver < 41) {
 138                 u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
 139
 140                 V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
 141                               gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
 142
 143                 if (v3d->ver < 33) {
 144                         V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
 145                                       gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
 146                 }
 147         }
 148 }
 149
 150 /* Invalidates the (read-only) L2C cache.  This was the L2 cache for
 151  * uniforms and instructions on V3D 3.2.
 152  */
 153 static void
 154 v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
 155 {
 156         if (v3d->ver > 32)
 157                 return;
 158
 159         V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
 160                        V3D_L2CACTL_L2CCLR |
 161                        V3D_L2CACTL_L2CENA);
 162 }
 163
 164 /* Invalidates texture L2 cachelines */
 165 static void
 166 v3d_flush_l2t(struct v3d_dev *v3d, int core)
 167 {
 168         /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
 169          * need to wait for completion before dispatching the job --
 170          * L2T accesses will be stalled until the flush has completed.
 171          * However, we do need to make sure we don't try to trigger a
 172          * new flush while the L2_CLEAN queue is trying to
 173          * synchronously clean after a job.
 174          */
 175         mutex_lock(&v3d->cache_clean_lock);
 176         V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 177                        V3D_L2TCACTL_L2TFLS |
 178                        V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
 179         mutex_unlock(&v3d->cache_clean_lock);
 180 }
 181
 182 /* Cleans texture L1 and L2 cachelines (writing back dirty data).
 183  *
 184  * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
 185  * executed, we need to make sure that the clean is done before
 186  * signaling job completion.  So, we synchronously wait before
 187  * returning, and we make sure that L2 invalidates don't happen in the
 188  * meantime to confuse our are-we-done checks.
 189  */
 190 void
 191 v3d_clean_caches(struct v3d_dev *v3d)
 192 {
 193         struct drm_device *dev = &v3d->drm;
 194         int core = 0;
 195
 196         trace_v3d_cache_clean_begin(dev);
 197
 198         V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
 199         if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
 200                        V3D_L2TCACTL_L2TFLS), 100)) {
 201                 DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
 202         }
 203
 204         mutex_lock(&v3d->cache_clean_lock);
 205         V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 206                        V3D_L2TCACTL_L2TFLS |
 207                        V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
 208
 209         if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
 210                        V3D_L2TCACTL_L2TFLS), 100)) {
 211                 DRM_ERROR("Timeout waiting for L2T clean\n");
 212         }
 213
 214         mutex_unlock(&v3d->cache_clean_lock);
 215
 216         trace_v3d_cache_clean_end(dev);
 217 }
 218
 219 /* Invalidates the slice caches.  These are read-only caches. */
 220 static void
 221 v3d_invalidate_slices(struct v3d_dev *v3d, int core)
 222 {
 223         V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
 224                        V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
 225                        V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
 226                        V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
 227                        V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 228 }
 229
 230 void
 231 v3d_invalidate_caches(struct v3d_dev *v3d)
 232 {
 233         /* Invalidate the caches from the outside in.  That way if
 234          * another CL's concurrent use of nearby memory were to pull
 235          * an invalidated cacheline back in, we wouldn't leave stale
 236          * data in the inner cache.
 237          */
 238         v3d_flush_l3(v3d);
 239         v3d_invalidate_l2c(v3d, 0);
 240         v3d_flush_l2t(v3d, 0);
 241         v3d_invalidate_slices(v3d, 0);
 242 }
 243
 244 /* Takes the reservation lock on all the BOs being referenced, so that
 245  * at queue submit time we can update the reservations.
 246  *
 247  * We don't lock the RCL the tile alloc/state BOs, or overflow memory
 248  * (all of which are on exec->unref_list).  They're entirely private
 249  * to v3d, so we don't attach dma-buf fences to them.
 250  */
 251 static int
 252 v3d_lock_bo_reservations(struct v3d_job *job,
 253                          struct ww_acquire_ctx *acquire_ctx)
 254 {
 255         int i, ret;
 256
 257         ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
 258         if (ret)
 259                 return ret;
 260
 261         for (i = 0; i < job->bo_count; i++) {
 262                 ret = drm_gem_fence_array_add_implicit(&job->deps,
 263                                                        job->bo[i], true);
 264                 if (ret) {
 265                         drm_gem_unlock_reservations(job->bo, job->bo_count,
 266                                                     acquire_ctx);
 267                         return ret;
 268                 }
 269         }
 270
 271         return 0;
 272 }
 273
 274 /**
 275  * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
 276  * referenced by the job.
 277  * @dev: DRM device
 278  * @file_priv: DRM file for this fd
 279  * @job: V3D job being set up
 280  * @bo_handles: GEM handles
 281  * @bo_count: Number of GEM handles passed in
 282  *
 283  * The command validator needs to reference BOs by their index within
 284  * the submitted job's BO list.  This does the validation of the job's
 285  * BO list and reference counting for the lifetime of the job.
 286  *
 287  * Note that this function doesn't need to unreference the BOs on
 288  * failure, because that will happen at v3d_exec_cleanup() time.
 289  */
 290 static int
 291 v3d_lookup_bos(struct drm_device *dev,
 292                struct drm_file *file_priv,
 293                struct v3d_job *job,
 294                u64 bo_handles,
 295                u32 bo_count)
 296 {
 297         u32 *handles;
 298         int ret = 0;
 299         int i;
 300
 301         job->bo_count = bo_count;
 302
 303         if (!job->bo_count) {
 304                 /* See comment on bo_index for why we have to check
 305                  * this.
 306                  */
 307                 DRM_DEBUG("Rendering requires BOs\n");
 308                 return -EINVAL;
 309         }
 310
 311         job->bo = kvmalloc_array(job->bo_count,
 312                                  sizeof(struct drm_gem_cma_object *),
 313                                  GFP_KERNEL | __GFP_ZERO);
 314         if (!job->bo) {
 315                 DRM_DEBUG("Failed to allocate validated BO pointers\n");
 316                 return -ENOMEM;
 317         }
 318
 319         handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
 320         if (!handles) {
 321                 ret = -ENOMEM;
 322                 DRM_DEBUG("Failed to allocate incoming GEM handles\n");
 323                 goto fail;
 324         }
 325
 326         if (copy_from_user(handles,
 327                            (void __user *)(uintptr_t)bo_handles,
 328                            job->bo_count * sizeof(u32))) {
 329                 ret = -EFAULT;
 330                 DRM_DEBUG("Failed to copy in GEM handles\n");
 331                 goto fail;
 332         }
 333
 334         spin_lock(&file_priv->table_lock);
 335         for (i = 0; i < job->bo_count; i++) {
 336                 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 337                                                      handles[i]);
 338                 if (!bo) {
 339                         DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 340                                   i, handles[i]);
 341                         ret = -ENOENT;
 342                         spin_unlock(&file_priv->table_lock);
 343                         goto fail;
 344                 }
 345                 drm_gem_object_get(bo);
 346                 job->bo[i] = bo;
 347         }
 348         spin_unlock(&file_priv->table_lock);
 349
 350 fail:
 351         kvfree(handles);
 352         return ret;
 353 }
 354
 355 static void
 356 v3d_job_free(struct kref *ref)
 357 {
 358         struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
 359         unsigned long index;
 360         struct dma_fence *fence;
 361         int i;
 362
 363         for (i = 0; i < job->bo_count; i++) {
 364                 if (job->bo[i])
 365                         drm_gem_object_put(job->bo[i]);
 366         }
 367         kvfree(job->bo);
 368
 369         xa_for_each(&job->deps, index, fence) {
 370                 dma_fence_put(fence);
 371         }
 372         xa_destroy(&job->deps);
 373
 374         dma_fence_put(job->irq_fence);
 375         dma_fence_put(job->done_fence);
 376
 377         pm_runtime_mark_last_busy(job->v3d->drm.dev);
 378         pm_runtime_put_autosuspend(job->v3d->drm.dev);
 379
 380         if (job->perfmon)
 381                 v3d_perfmon_put(job->perfmon);
 382
 383         kfree(job);
 384 }
 385
 386 static void
 387 v3d_render_job_free(struct kref *ref)
 388 {
 389         struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
 390                                                   base.refcount);
 391         struct v3d_bo *bo, *save;
 392
 393         list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
 394                 drm_gem_object_put(&bo->base.base);
 395         }
 396
 397         v3d_job_free(ref);
 398 }
 399
 400 void v3d_job_put(struct v3d_job *job)
 401 {
 402         kref_put(&job->refcount, job->free);
 403 }
 404
 405 int
 406 v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 407                   struct drm_file *file_priv)
 408 {
 409         int ret;
 410         struct drm_v3d_wait_bo *args = data;
 411         ktime_t start = ktime_get();
 412         u64 delta_ns;
 413         unsigned long timeout_jiffies =
 414                 nsecs_to_jiffies_timeout(args->timeout_ns);
 415
 416         if (args->pad != 0)
 417                 return -EINVAL;
 418
 419         ret = drm_gem_dma_resv_wait(file_priv, args->handle,
 420                                               true, timeout_jiffies);
 421
 422         /* Decrement the user's timeout, in case we got interrupted
 423          * such that the ioctl will be restarted.
 424          */
 425         delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
 426         if (delta_ns < args->timeout_ns)
 427                 args->timeout_ns -= delta_ns;
 428         else
 429                 args->timeout_ns = 0;
 430
 431         /* Asked to wait beyond the jiffie/scheduler precision? */
 432         if (ret == -ETIME && args->timeout_ns)
 433                 ret = -EAGAIN;
 434
 435         return ret;
 436 }
 437
 438 static int
 439 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 440              struct v3d_job *job, void (*free)(struct kref *ref),
 441              u32 in_sync)
 442 {
 443         struct dma_fence *in_fence = NULL;
 444         int ret;
 445
 446         job->v3d = v3d;
 447         job->free = free;
 448
 449         ret = pm_runtime_get_sync(v3d->drm.dev);
 450         if (ret < 0)
 451                 return ret;
 452
 453         xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
 454
 455         ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence);
 456         if (ret == -EINVAL)
 457                 goto fail;
 458
 459         ret = drm_gem_fence_array_add(&job->deps, in_fence);
 460         if (ret)
 461                 goto fail;
 462
 463         kref_init(&job->refcount);
 464
 465         return 0;
 466 fail:
 467         xa_destroy(&job->deps);
 468         pm_runtime_put_autosuspend(v3d->drm.dev);
 469         return ret;
 470 }
 471
 472 static int
 473 v3d_push_job(struct v3d_file_priv *v3d_priv,
 474              struct v3d_job *job, enum v3d_queue queue)
 475 {
 476         int ret;
 477
 478         ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 479                                  v3d_priv);
 480         if (ret)
 481                 return ret;
 482
 483         job->done_fence = dma_fence_get(&job->base.s_fence->finished);
 484
 485         /* put by scheduler job completion */
 486         kref_get(&job->refcount);
 487
 488         drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
 489
 490         return 0;
 491 }
 492
 493 static void
 494 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
 495                                          struct v3d_job *job,
 496                                          struct ww_acquire_ctx *acquire_ctx,
 497                                          u32 out_sync,
 498                                          struct dma_fence *done_fence)
 499 {
 500         struct drm_syncobj *sync_out;
 501         int i;
 502
 503         for (i = 0; i < job->bo_count; i++) {
 504                 /* XXX: Use shared fences for read-only objects. */
 505                 dma_resv_add_excl_fence(job->bo[i]->resv,
 506                                                   job->done_fence);
 507         }
 508
 509         drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
 510
 511         /* Update the return sync object for the job */
 512         sync_out = drm_syncobj_find(file_priv, out_sync);
 513         if (sync_out) {
 514                 drm_syncobj_replace_fence(sync_out, done_fence);
 515                 drm_syncobj_put(sync_out);
 516         }
 517 }
 518
 519 /**
 520  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
 521  * @dev: DRM device
 522  * @data: ioctl argument
 523  * @file_priv: DRM file for this fd
 524  *
 525  * This is the main entrypoint for userspace to submit a 3D frame to
 526  * the GPU.  Userspace provides the binner command list (if
 527  * applicable), and the kernel sets up the render command list to draw
 528  * to the framebuffer described in the ioctl, using the command lists
 529  * that the 3D engine's binner will produce.
 530  */
 531 int
 532 v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 533                     struct drm_file *file_priv)
 534 {
 535         struct v3d_dev *v3d = to_v3d_dev(dev);
 536         struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 537         struct drm_v3d_submit_cl *args = data;
 538         struct v3d_bin_job *bin = NULL;
 539         struct v3d_render_job *render;
 540         struct v3d_job *clean_job = NULL;
 541         struct v3d_job *last_job;
 542         struct ww_acquire_ctx acquire_ctx;
 543         int ret = 0;
 544
 545         trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
 546
 547         if (args->pad != 0)
 548                 return -EINVAL;
 549
 550         if (args->flags != 0 &&
 551             args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
 552                 DRM_INFO("invalid flags: %d\n", args->flags);
 553                 return -EINVAL;
 554         }
 555
 556         render = kcalloc(1, sizeof(*render), GFP_KERNEL);
 557         if (!render)
 558                 return -ENOMEM;
 559
 560         render->start = args->rcl_start;
 561         render->end = args->rcl_end;
 562         INIT_LIST_HEAD(&render->unref_list);
 563
 564         ret = v3d_job_init(v3d, file_priv, &render->base,
 565                            v3d_render_job_free, args->in_sync_rcl);
 566         if (ret) {
 567                 kfree(render);
 568                 return ret;
 569         }
 570
 571         if (args->bcl_start != args->bcl_end) {
 572                 bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
 573                 if (!bin) {
 574                         v3d_job_put(&render->base);
 575                         return -ENOMEM;
 576                 }
 577
 578                 ret = v3d_job_init(v3d, file_priv, &bin->base,
 579                                    v3d_job_free, args->in_sync_bcl);
 580                 if (ret) {
 581                         v3d_job_put(&render->base);
 582                         kfree(bin);
 583                         return ret;
 584                 }
 585
 586                 bin->start = args->bcl_start;
 587                 bin->end = args->bcl_end;
 588                 bin->qma = args->qma;
 589                 bin->qms = args->qms;
 590                 bin->qts = args->qts;
 591                 bin->render = render;
 592         }
 593
 594         if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
 595                 clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
 596                 if (!clean_job) {
 597                         ret = -ENOMEM;
 598                         goto fail;
 599                 }
 600
 601                 ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
 602                 if (ret) {
 603                         kfree(clean_job);
 604                         clean_job = NULL;
 605                         goto fail;
 606                 }
 607
 608                 last_job = clean_job;
 609         } else {
 610                 last_job = &render->base;
 611         }
 612
 613         ret = v3d_lookup_bos(dev, file_priv, last_job,
 614                              args->bo_handles, args->bo_handle_count);
 615         if (ret)
 616                 goto fail;
 617
 618         ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
 619         if (ret)
 620                 goto fail;
 621
 622         if (args->perfmon_id) {
 623                 render->base.perfmon = v3d_perfmon_find(v3d_priv,
 624                                                         args->perfmon_id);
 625
 626                 if (!render->base.perfmon) {
 627                         ret = -ENOENT;
 628                         goto fail;
 629                 }
 630         }
 631
 632         mutex_lock(&v3d->sched_lock);
 633         if (bin) {
 634                 bin->base.perfmon = render->base.perfmon;
 635                 v3d_perfmon_get(bin->base.perfmon);
 636                 ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
 637                 if (ret)
 638                         goto fail_unreserve;
 639
 640                 ret = drm_gem_fence_array_add(&render->base.deps,
 641                                               dma_fence_get(bin->base.done_fence));
 642                 if (ret)
 643                         goto fail_unreserve;
 644         }
 645
 646         ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
 647         if (ret)
 648                 goto fail_unreserve;
 649
 650         if (clean_job) {
 651                 struct dma_fence *render_fence =
 652                         dma_fence_get(render->base.done_fence);
 653                 ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
 654                 if (ret)
 655                         goto fail_unreserve;
 656                 clean_job->perfmon = render->base.perfmon;
 657                 v3d_perfmon_get(clean_job->perfmon);
 658                 ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
 659                 if (ret)
 660                         goto fail_unreserve;
 661         }
 662
 663         mutex_unlock(&v3d->sched_lock);
 664
 665         v3d_attach_fences_and_unlock_reservation(file_priv,
 666                                                  last_job,
 667                                                  &acquire_ctx,
 668                                                  args->out_sync,
 669                                                  last_job->done_fence);
 670
 671         if (bin)
 672                 v3d_job_put(&bin->base);
 673         v3d_job_put(&render->base);
 674         if (clean_job)
 675                 v3d_job_put(clean_job);
 676
 677         return 0;
 678
 679 fail_unreserve:
 680         mutex_unlock(&v3d->sched_lock);
 681         drm_gem_unlock_reservations(last_job->bo,
 682                                     last_job->bo_count, &acquire_ctx);
 683 fail:
 684         if (bin)
 685                 v3d_job_put(&bin->base);
 686         v3d_job_put(&render->base);
 687         if (clean_job)
 688                 v3d_job_put(clean_job);
 689
 690         return ret;
 691 }
 692
 693 /**
 694  * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
 695  * @dev: DRM device
 696  * @data: ioctl argument
 697  * @file_priv: DRM file for this fd
 698  *
 699  * Userspace provides the register setup for the TFU, which we don't
 700  * need to validate since the TFU is behind the MMU.
 701  */
 702 int
 703 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 704                      struct drm_file *file_priv)
 705 {
 706         struct v3d_dev *v3d = to_v3d_dev(dev);
 707         struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 708         struct drm_v3d_submit_tfu *args = data;
 709         struct v3d_tfu_job *job;
 710         struct ww_acquire_ctx acquire_ctx;
 711         int ret = 0;
 712
 713         trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
 714
 715         job = kcalloc(1, sizeof(*job), GFP_KERNEL);
 716         if (!job)
 717                 return -ENOMEM;
 718
 719         ret = v3d_job_init(v3d, file_priv, &job->base,
 720                            v3d_job_free, args->in_sync);
 721         if (ret) {
 722                 kfree(job);
 723                 return ret;
 724         }
 725
 726         job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
 727                                sizeof(*job->base.bo), GFP_KERNEL);
 728         if (!job->base.bo) {
 729                 v3d_job_put(&job->base);
 730                 return -ENOMEM;
 731         }
 732
 733         job->args = *args;
 734
 735         spin_lock(&file_priv->table_lock);
 736         for (job->base.bo_count = 0;
 737              job->base.bo_count < ARRAY_SIZE(args->bo_handles);
 738              job->base.bo_count++) {
 739                 struct drm_gem_object *bo;
 740
 741                 if (!args->bo_handles[job->base.bo_count])
 742                         break;
 743
 744                 bo = idr_find(&file_priv->object_idr,
 745                               args->bo_handles[job->base.bo_count]);
 746                 if (!bo) {
 747                         DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 748                                   job->base.bo_count,
 749                                   args->bo_handles[job->base.bo_count]);
 750                         ret = -ENOENT;
 751                         spin_unlock(&file_priv->table_lock);
 752                         goto fail;
 753                 }
 754                 drm_gem_object_get(bo);
 755                 job->base.bo[job->base.bo_count] = bo;
 756         }
 757         spin_unlock(&file_priv->table_lock);
 758
 759         ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
 760         if (ret)
 761                 goto fail;
 762
 763         mutex_lock(&v3d->sched_lock);
 764         ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
 765         if (ret)
 766                 goto fail_unreserve;
 767         mutex_unlock(&v3d->sched_lock);
 768
 769         v3d_attach_fences_and_unlock_reservation(file_priv,
 770                                                  &job->base, &acquire_ctx,
 771                                                  args->out_sync,
 772                                                  job->base.done_fence);
 773
 774         v3d_job_put(&job->base);
 775
 776         return 0;
 777
 778 fail_unreserve:
 779         mutex_unlock(&v3d->sched_lock);
 780         drm_gem_unlock_reservations(job->base.bo, job->base.bo_count,
 781                                     &acquire_ctx);
 782 fail:
 783         v3d_job_put(&job->base);
 784
 785         return ret;
 786 }
 787
 788 /**
 789  * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
 790  * @dev: DRM device
 791  * @data: ioctl argument
 792  * @file_priv: DRM file for this fd
 793  *
 794  * Userspace provides the register setup for the CSD, which we don't
 795  * need to validate since the CSD is behind the MMU.
 796  */
 797 int
 798 v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 799                      struct drm_file *file_priv)
 800 {
 801         struct v3d_dev *v3d = to_v3d_dev(dev);
 802         struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 803         struct drm_v3d_submit_csd *args = data;
 804         struct v3d_csd_job *job;
 805         struct v3d_job *clean_job;
 806         struct ww_acquire_ctx acquire_ctx;
 807         int ret;
 808
 809         trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
 810
 811         if (!v3d_has_csd(v3d)) {
 812                 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
 813                 return -EINVAL;
 814         }
 815
 816         job = kcalloc(1, sizeof(*job), GFP_KERNEL);
 817         if (!job)
 818                 return -ENOMEM;
 819
 820         ret = v3d_job_init(v3d, file_priv, &job->base,
 821                            v3d_job_free, args->in_sync);
 822         if (ret) {
 823                 kfree(job);
 824                 return ret;
 825         }
 826
 827         clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
 828         if (!clean_job) {
 829                 v3d_job_put(&job->base);
 830                 kfree(job);
 831                 return -ENOMEM;
 832         }
 833
 834         ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
 835         if (ret) {
 836                 v3d_job_put(&job->base);
 837                 kfree(clean_job);
 838                 return ret;
 839         }
 840
 841         job->args = *args;
 842
 843         ret = v3d_lookup_bos(dev, file_priv, clean_job,
 844                              args->bo_handles, args->bo_handle_count);
 845         if (ret)
 846                 goto fail;
 847
 848         ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
 849         if (ret)
 850                 goto fail;
 851
 852         if (args->perfmon_id) {
 853                 job->base.perfmon = v3d_perfmon_find(v3d_priv,
 854                                                      args->perfmon_id);
 855                 if (!job->base.perfmon) {
 856                         ret = -ENOENT;
 857                         goto fail;
 858                 }
 859         }
 860
 861         mutex_lock(&v3d->sched_lock);
 862         ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
 863         if (ret)
 864                 goto fail_unreserve;
 865
 866         ret = drm_gem_fence_array_add(&clean_job->deps,
 867                                       dma_fence_get(job->base.done_fence));
 868         if (ret)
 869                 goto fail_unreserve;
 870
 871         ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
 872         if (ret)
 873                 goto fail_unreserve;
 874         mutex_unlock(&v3d->sched_lock);
 875
 876         v3d_attach_fences_and_unlock_reservation(file_priv,
 877                                                  clean_job,
 878                                                  &acquire_ctx,
 879                                                  args->out_sync,
 880                                                  clean_job->done_fence);
 881
 882         v3d_job_put(&job->base);
 883         v3d_job_put(clean_job);
 884
 885         return 0;
 886
 887 fail_unreserve:
 888         mutex_unlock(&v3d->sched_lock);
 889         drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
 890                                     &acquire_ctx);
 891 fail:
 892         v3d_job_put(&job->base);
 893         v3d_job_put(clean_job);
 894
 895         return ret;
 896 }
 897
 898 int
 899 v3d_gem_init(struct drm_device *dev)
 900 {
 901         struct v3d_dev *v3d = to_v3d_dev(dev);
 902         u32 pt_size = 4096 * 1024;
 903         int ret, i;
 904
 905         for (i = 0; i < V3D_MAX_QUEUES; i++)
 906                 v3d->queue[i].fence_context = dma_fence_context_alloc(1);
 907
 908         spin_lock_init(&v3d->mm_lock);
 909         spin_lock_init(&v3d->job_lock);
 910         mutex_init(&v3d->bo_lock);
 911         mutex_init(&v3d->reset_lock);
 912         mutex_init(&v3d->sched_lock);
 913         mutex_init(&v3d->cache_clean_lock);
 914
 915         /* Note: We don't allocate address 0.  Various bits of HW
 916          * treat 0 as special, such as the occlusion query counters
 917          * where 0 means "disabled".
 918          */
 919         drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
 920
 921         v3d->pt = dma_alloc_wc(v3d->drm.dev, pt_size,
 922                                &v3d->pt_paddr,
 923                                GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 924         if (!v3d->pt) {
 925                 drm_mm_takedown(&v3d->mm);
 926                 dev_err(v3d->drm.dev,
 927                         "Failed to allocate page tables. "
 928                         "Please ensure you have CMA enabled.\n");
 929                 return -ENOMEM;
 930         }
 931
 932         v3d_init_hw_state(v3d);
 933         v3d_mmu_set_page_table(v3d);
 934
 935         ret = v3d_sched_init(v3d);
 936         if (ret) {
 937                 drm_mm_takedown(&v3d->mm);
 938                 dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
 939                                   v3d->pt_paddr);
 940         }
 941
 942         return 0;
 943 }
 944
 945 void
 946 v3d_gem_destroy(struct drm_device *dev)
 947 {
 948         struct v3d_dev *v3d = to_v3d_dev(dev);
 949
 950         v3d_sched_fini(v3d);
 951
 952         /* Waiting for jobs to finish would need to be done before
 953          * unregistering V3D.
 954          */
 955         WARN_ON(v3d->bin_job);
 956         WARN_ON(v3d->render_job);
 957
 958         drm_mm_takedown(&v3d->mm);
 959
 960         dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
 961                           v3d->pt_paddr);
 962 }