drivers/gpu/drm/v3d/v3d_sched.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /* Copyright (C) 2018 Broadcom */
   3
   4 /**
   5  * DOC: Broadcom V3D scheduling
   6  *
   7  * The shared DRM GPU scheduler is used to coordinate submitting jobs
   8  * to the hardware.  Each DRM fd (roughly a client process) gets its
   9  * own scheduler entity, which will process jobs in order.  The GPU
  10  * scheduler will round-robin between clients to submit the next job.
  11  *
  12  * For simplicity, and in order to keep latency low for interactive
  13  * jobs when bulk background jobs are queued up, we submit a new job
  14  * to the HW only when it has completed the last one, instead of
  15  * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
  16  * drm_sched_job_add_dependency() to manage the dependency between bin and
  17  * render, instead of having the clients submit jobs using the HW's
  18  * semaphores to interlock between them.
  19  */
  20
  21 #include <linux/sched/clock.h>
  22 #include <linux/kthread.h>
  23
  24 #include <drm/drm_syncobj.h>
  25
  26 #include "v3d_drv.h"
  27 #include "v3d_regs.h"
  28 #include "v3d_trace.h"
  29
  30 #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
  31
  32 static struct v3d_job *
  33 to_v3d_job(struct drm_sched_job *sched_job)
  34 {
  35         return container_of(sched_job, struct v3d_job, base);
  36 }
  37
  38 static struct v3d_bin_job *
  39 to_bin_job(struct drm_sched_job *sched_job)
  40 {
  41         return container_of(sched_job, struct v3d_bin_job, base.base);
  42 }
  43
  44 static struct v3d_render_job *
  45 to_render_job(struct drm_sched_job *sched_job)
  46 {
  47         return container_of(sched_job, struct v3d_render_job, base.base);
  48 }
  49
  50 static struct v3d_tfu_job *
  51 to_tfu_job(struct drm_sched_job *sched_job)
  52 {
  53         return container_of(sched_job, struct v3d_tfu_job, base.base);
  54 }
  55
  56 static struct v3d_csd_job *
  57 to_csd_job(struct drm_sched_job *sched_job)
  58 {
  59         return container_of(sched_job, struct v3d_csd_job, base.base);
  60 }
  61
  62 static struct v3d_cpu_job *
  63 to_cpu_job(struct drm_sched_job *sched_job)
  64 {
  65         return container_of(sched_job, struct v3d_cpu_job, base.base);
  66 }
  67
  68 static void
  69 v3d_sched_job_free(struct drm_sched_job *sched_job)
  70 {
  71         struct v3d_job *job = to_v3d_job(sched_job);
  72
  73         v3d_job_cleanup(job);
  74 }
  75
  76 static void
  77 v3d_cpu_job_free(struct drm_sched_job *sched_job)
  78 {
  79         struct v3d_cpu_job *job = to_cpu_job(sched_job);
  80         struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
  81         struct v3d_performance_query_info *performance_query = &job->performance_query;
  82
  83         if (timestamp_query->queries) {
  84                 for (int i = 0; i < timestamp_query->count; i++)
  85                         drm_syncobj_put(timestamp_query->queries[i].syncobj);
  86                 kvfree(timestamp_query->queries);
  87         }
  88
  89         if (performance_query->queries) {
  90                 for (int i = 0; i < performance_query->count; i++)
  91                         drm_syncobj_put(performance_query->queries[i].syncobj);
  92                 kvfree(performance_query->queries);
  93         }
  94
  95         v3d_job_cleanup(&job->base);
  96 }
  97
  98 static void
  99 v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 100 {
 101         if (job->perfmon != v3d->active_perfmon)
 102                 v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
 103
 104         if (job->perfmon && v3d->active_perfmon != job->perfmon)
 105                 v3d_perfmon_start(v3d, job->perfmon);
 106 }
 107
 108 static void
 109 v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
 110 {
 111         struct v3d_dev *v3d = job->v3d;
 112         struct v3d_file_priv *file = job->file->driver_priv;
 113         struct v3d_stats *global_stats = &v3d->queue[queue].stats;
 114         struct v3d_stats *local_stats = &file->stats[queue];
 115         u64 now = local_clock();
 116
 117         write_seqcount_begin(&local_stats->lock);
 118         local_stats->start_ns = now;
 119         write_seqcount_end(&local_stats->lock);
 120
 121         write_seqcount_begin(&global_stats->lock);
 122         global_stats->start_ns = now;
 123         write_seqcount_end(&global_stats->lock);
 124 }
 125
 126 static void
 127 v3d_stats_update(struct v3d_stats *stats, u64 now)
 128 {
 129         write_seqcount_begin(&stats->lock);
 130         stats->enabled_ns += now - stats->start_ns;
 131         stats->jobs_completed++;
 132         stats->start_ns = 0;
 133         write_seqcount_end(&stats->lock);
 134 }
 135
 136 void
 137 v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
 138 {
 139         struct v3d_dev *v3d = job->v3d;
 140         struct v3d_file_priv *file = job->file->driver_priv;
 141         struct v3d_stats *global_stats = &v3d->queue[queue].stats;
 142         struct v3d_stats *local_stats = &file->stats[queue];
 143         u64 now = local_clock();
 144
 145         v3d_stats_update(local_stats, now);
 146         v3d_stats_update(global_stats, now);
 147 }
 148
 149 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 150 {
 151         struct v3d_bin_job *job = to_bin_job(sched_job);
 152         struct v3d_dev *v3d = job->base.v3d;
 153         struct drm_device *dev = &v3d->drm;
 154         struct dma_fence *fence;
 155         unsigned long irqflags;
 156
 157         if (unlikely(job->base.base.s_fence->finished.error))
 158                 return NULL;
 159
 160         /* Lock required around bin_job update vs
 161          * v3d_overflow_mem_work().
 162          */
 163         spin_lock_irqsave(&v3d->job_lock, irqflags);
 164         v3d->bin_job = job;
 165         /* Clear out the overflow allocation, so we don't
 166          * reuse the overflow attached to a previous job.
 167          */
 168         V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 169         spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 170
 171         v3d_invalidate_caches(v3d);
 172
 173         fence = v3d_fence_create(v3d, V3D_BIN);
 174         if (IS_ERR(fence))
 175                 return NULL;
 176
 177         if (job->base.irq_fence)
 178                 dma_fence_put(job->base.irq_fence);
 179         job->base.irq_fence = dma_fence_get(fence);
 180
 181         trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 182                             job->start, job->end);
 183
 184         v3d_job_start_stats(&job->base, V3D_BIN);
 185         v3d_switch_perfmon(v3d, &job->base);
 186
 187         /* Set the current and end address of the control list.
 188          * Writing the end register is what starts the job.
 189          */
 190         if (job->qma) {
 191                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
 192                 V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
 193         }
 194         if (job->qts) {
 195                 V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
 196                                V3D_CLE_CT0QTS_ENABLE |
 197                                job->qts);
 198         }
 199         V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
 200         V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
 201
 202         return fence;
 203 }
 204
 205 static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 206 {
 207         struct v3d_render_job *job = to_render_job(sched_job);
 208         struct v3d_dev *v3d = job->base.v3d;
 209         struct drm_device *dev = &v3d->drm;
 210         struct dma_fence *fence;
 211
 212         if (unlikely(job->base.base.s_fence->finished.error))
 213                 return NULL;
 214
 215         v3d->render_job = job;
 216
 217         /* Can we avoid this flush?  We need to be careful of
 218          * scheduling, though -- imagine job0 rendering to texture and
 219          * job1 reading, and them being executed as bin0, bin1,
 220          * render0, render1, so that render1's flush at bin time
 221          * wasn't enough.
 222          */
 223         v3d_invalidate_caches(v3d);
 224
 225         fence = v3d_fence_create(v3d, V3D_RENDER);
 226         if (IS_ERR(fence))
 227                 return NULL;
 228
 229         if (job->base.irq_fence)
 230                 dma_fence_put(job->base.irq_fence);
 231         job->base.irq_fence = dma_fence_get(fence);
 232
 233         trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 234                             job->start, job->end);
 235
 236         v3d_job_start_stats(&job->base, V3D_RENDER);
 237         v3d_switch_perfmon(v3d, &job->base);
 238
 239         /* XXX: Set the QCFG */
 240
 241         /* Set the current and end address of the control list.
 242          * Writing the end register is what starts the job.
 243          */
 244         V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
 245         V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
 246
 247         return fence;
 248 }
 249
 250 static struct dma_fence *
 251 v3d_tfu_job_run(struct drm_sched_job *sched_job)
 252 {
 253         struct v3d_tfu_job *job = to_tfu_job(sched_job);
 254         struct v3d_dev *v3d = job->base.v3d;
 255         struct drm_device *dev = &v3d->drm;
 256         struct dma_fence *fence;
 257
 258         fence = v3d_fence_create(v3d, V3D_TFU);
 259         if (IS_ERR(fence))
 260                 return NULL;
 261
 262         v3d->tfu_job = job;
 263         if (job->base.irq_fence)
 264                 dma_fence_put(job->base.irq_fence);
 265         job->base.irq_fence = dma_fence_get(fence);
 266
 267         trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 268
 269         v3d_job_start_stats(&job->base, V3D_TFU);
 270
 271         V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
 272         V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
 273         V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica);
 274         V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua);
 275         V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa);
 276         if (v3d->ver >= 71)
 277                 V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc);
 278         V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios);
 279         V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]);
 280         if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) {
 281                 V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]);
 282                 V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]);
 283                 V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]);
 284         }
 285         /* ICFG kicks off the job. */
 286         V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC);
 287
 288         return fence;
 289 }
 290
 291 static struct dma_fence *
 292 v3d_csd_job_run(struct drm_sched_job *sched_job)
 293 {
 294         struct v3d_csd_job *job = to_csd_job(sched_job);
 295         struct v3d_dev *v3d = job->base.v3d;
 296         struct drm_device *dev = &v3d->drm;
 297         struct dma_fence *fence;
 298         int i, csd_cfg0_reg, csd_cfg_reg_count;
 299
 300         v3d->csd_job = job;
 301
 302         v3d_invalidate_caches(v3d);
 303
 304         fence = v3d_fence_create(v3d, V3D_CSD);
 305         if (IS_ERR(fence))
 306                 return NULL;
 307
 308         if (job->base.irq_fence)
 309                 dma_fence_put(job->base.irq_fence);
 310         job->base.irq_fence = dma_fence_get(fence);
 311
 312         trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 313
 314         v3d_job_start_stats(&job->base, V3D_CSD);
 315         v3d_switch_perfmon(v3d, &job->base);
 316
 317         csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
 318         csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7;
 319         for (i = 1; i <= csd_cfg_reg_count; i++)
 320                 V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
 321         /* CFG0 write kicks off the job. */
 322         V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
 323
 324         return fence;
 325 }
 326
 327 static void
 328 v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
 329 {
 330         struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd;
 331         struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 332         struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
 333         struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
 334         u32 *wg_counts;
 335
 336         v3d_get_bo_vaddr(bo);
 337         v3d_get_bo_vaddr(indirect);
 338
 339         wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset);
 340
 341         if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0)
 342                 return;
 343
 344         args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
 345         args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
 346         args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
 347         args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
 348                        (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
 349
 350         for (int i = 0; i < 3; i++) {
 351                 /* 0xffffffff indicates that the uniform rewrite is not needed */
 352                 if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) {
 353                         u32 uniform_idx = indirect_csd->wg_uniform_offsets[i];
 354                         ((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i];
 355                 }
 356         }
 357
 358         v3d_put_bo_vaddr(indirect);
 359         v3d_put_bo_vaddr(bo);
 360 }
 361
 362 static void
 363 v3d_timestamp_query(struct v3d_cpu_job *job)
 364 {
 365         struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
 366         struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 367         u8 *value_addr;
 368
 369         v3d_get_bo_vaddr(bo);
 370
 371         for (int i = 0; i < timestamp_query->count; i++) {
 372                 value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset;
 373                 *((u64 *)value_addr) = i == 0 ? ktime_get_ns() : 0ull;
 374
 375                 drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj,
 376                                           job->base.done_fence);
 377         }
 378
 379         v3d_put_bo_vaddr(bo);
 380 }
 381
 382 static void
 383 v3d_reset_timestamp_queries(struct v3d_cpu_job *job)
 384 {
 385         struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
 386         struct v3d_timestamp_query *queries = timestamp_query->queries;
 387         struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 388         u8 *value_addr;
 389
 390         v3d_get_bo_vaddr(bo);
 391
 392         for (int i = 0; i < timestamp_query->count; i++) {
 393                 value_addr = ((u8 *)bo->vaddr) + queries[i].offset;
 394                 *((u64 *)value_addr) = 0;
 395
 396                 drm_syncobj_replace_fence(queries[i].syncobj, NULL);
 397         }
 398
 399         v3d_put_bo_vaddr(bo);
 400 }
 401
 402 static void
 403 write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value)
 404 {
 405         if (do_64bit) {
 406                 u64 *dst64 = (u64 *)dst;
 407
 408                 dst64[idx] = value;
 409         } else {
 410                 u32 *dst32 = (u32 *)dst;
 411
 412                 dst32[idx] = (u32)value;
 413         }
 414 }
 415
 416 static void
 417 v3d_copy_query_results(struct v3d_cpu_job *job)
 418 {
 419         struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
 420         struct v3d_timestamp_query *queries = timestamp_query->queries;
 421         struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 422         struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]);
 423         struct v3d_copy_query_results_info *copy = &job->copy;
 424         struct dma_fence *fence;
 425         u8 *query_addr;
 426         bool available, write_result;
 427         u8 *data;
 428         int i;
 429
 430         v3d_get_bo_vaddr(bo);
 431         v3d_get_bo_vaddr(timestamp);
 432
 433         data = ((u8 *)bo->vaddr) + copy->offset;
 434
 435         for (i = 0; i < timestamp_query->count; i++) {
 436                 fence = drm_syncobj_fence_get(queries[i].syncobj);
 437                 available = fence ? dma_fence_is_signaled(fence) : false;
 438
 439                 write_result = available || copy->do_partial;
 440                 if (write_result) {
 441                         query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset;
 442                         write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr));
 443                 }
 444
 445                 if (copy->availability_bit)
 446                         write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u);
 447
 448                 data += copy->stride;
 449
 450                 dma_fence_put(fence);
 451         }
 452
 453         v3d_put_bo_vaddr(timestamp);
 454         v3d_put_bo_vaddr(bo);
 455 }
 456
 457 static void
 458 v3d_reset_performance_queries(struct v3d_cpu_job *job)
 459 {
 460         struct v3d_performance_query_info *performance_query = &job->performance_query;
 461         struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
 462         struct v3d_dev *v3d = job->base.v3d;
 463         struct v3d_perfmon *perfmon;
 464
 465         for (int i = 0; i < performance_query->count; i++) {
 466                 for (int j = 0; j < performance_query->nperfmons; j++) {
 467                         perfmon = v3d_perfmon_find(v3d_priv,
 468                                                    performance_query->queries[i].kperfmon_ids[j]);
 469                         if (!perfmon) {
 470                                 DRM_DEBUG("Failed to find perfmon.");
 471                                 continue;
 472                         }
 473
 474                         v3d_perfmon_stop(v3d, perfmon, false);
 475
 476                         memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64));
 477
 478                         v3d_perfmon_put(perfmon);
 479                 }
 480
 481                 drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL);
 482         }
 483 }
 484
 485 static void
 486 v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query)
 487 {
 488         struct v3d_performance_query_info *performance_query = &job->performance_query;
 489         struct v3d_copy_query_results_info *copy = &job->copy;
 490         struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
 491         struct v3d_dev *v3d = job->base.v3d;
 492         struct v3d_perfmon *perfmon;
 493         u64 counter_values[V3D_PERFCNT_NUM];
 494
 495         for (int i = 0; i < performance_query->nperfmons; i++) {
 496                 perfmon = v3d_perfmon_find(v3d_priv,
 497                                            performance_query->queries[query].kperfmon_ids[i]);
 498                 if (!perfmon) {
 499                         DRM_DEBUG("Failed to find perfmon.");
 500                         continue;
 501                 }
 502
 503                 v3d_perfmon_stop(v3d, perfmon, true);
 504
 505                 memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values,
 506                        perfmon->ncounters * sizeof(u64));
 507
 508                 v3d_perfmon_put(perfmon);
 509         }
 510
 511         for (int i = 0; i < performance_query->ncounters; i++)
 512                 write_to_buffer(data, i, copy->do_64bit, counter_values[i]);
 513 }
 514
 515 static void
 516 v3d_copy_performance_query(struct v3d_cpu_job *job)
 517 {
 518         struct v3d_performance_query_info *performance_query = &job->performance_query;
 519         struct v3d_copy_query_results_info *copy = &job->copy;
 520         struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
 521         struct dma_fence *fence;
 522         bool available, write_result;
 523         u8 *data;
 524
 525         v3d_get_bo_vaddr(bo);
 526
 527         data = ((u8 *)bo->vaddr) + copy->offset;
 528
 529         for (int i = 0; i < performance_query->count; i++) {
 530                 fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
 531                 available = fence ? dma_fence_is_signaled(fence) : false;
 532
 533                 write_result = available || copy->do_partial;
 534                 if (write_result)
 535                         v3d_write_performance_query_result(job, data, i);
 536
 537                 if (copy->availability_bit)
 538                         write_to_buffer(data, performance_query->ncounters,
 539                                         copy->do_64bit, available ? 1u : 0u);
 540
 541                 data += copy->stride;
 542
 543                 dma_fence_put(fence);
 544         }
 545
 546         v3d_put_bo_vaddr(bo);
 547 }
 548
 549 static const v3d_cpu_job_fn cpu_job_function[] = {
 550         [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 551         [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 552         [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
 553         [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
 554         [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
 555         [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query,
 556 };
 557
 558 static struct dma_fence *
 559 v3d_cpu_job_run(struct drm_sched_job *sched_job)
 560 {
 561         struct v3d_cpu_job *job = to_cpu_job(sched_job);
 562         struct v3d_dev *v3d = job->base.v3d;
 563
 564         v3d->cpu_job = job;
 565
 566         if (job->job_type >= ARRAY_SIZE(cpu_job_function)) {
 567                 DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type);
 568                 return NULL;
 569         }
 570
 571         v3d_job_start_stats(&job->base, V3D_CPU);
 572         trace_v3d_cpu_job_begin(&v3d->drm, job->job_type);
 573
 574         cpu_job_function[job->job_type](job);
 575
 576         trace_v3d_cpu_job_end(&v3d->drm, job->job_type);
 577         v3d_job_update_stats(&job->base, V3D_CPU);
 578
 579         return NULL;
 580 }
 581
 582 static struct dma_fence *
 583 v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 584 {
 585         struct v3d_job *job = to_v3d_job(sched_job);
 586         struct v3d_dev *v3d = job->v3d;
 587
 588         v3d_job_start_stats(job, V3D_CACHE_CLEAN);
 589
 590         v3d_clean_caches(v3d);
 591
 592         v3d_job_update_stats(job, V3D_CACHE_CLEAN);
 593
 594         return NULL;
 595 }
 596
 597 static enum drm_gpu_sched_stat
 598 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 599 {
 600         enum v3d_queue q;
 601
 602         mutex_lock(&v3d->reset_lock);
 603
 604         /* block scheduler */
 605         for (q = 0; q < V3D_MAX_QUEUES; q++)
 606                 drm_sched_stop(&v3d->queue[q].sched, sched_job);
 607
 608         if (sched_job)
 609                 drm_sched_increase_karma(sched_job);
 610
 611         /* get the GPU back into the init state */
 612         v3d_reset(v3d);
 613
 614         for (q = 0; q < V3D_MAX_QUEUES; q++)
 615                 drm_sched_resubmit_jobs(&v3d->queue[q].sched);
 616
 617         /* Unblock schedulers and restart their jobs. */
 618         for (q = 0; q < V3D_MAX_QUEUES; q++) {
 619                 drm_sched_start(&v3d->queue[q].sched, true);
 620         }
 621
 622         mutex_unlock(&v3d->reset_lock);
 623
 624         return DRM_GPU_SCHED_STAT_NOMINAL;
 625 }
 626
 627 /* If the current address or return address have changed, then the GPU
 628  * has probably made progress and we should delay the reset.  This
 629  * could fail if the GPU got in an infinite loop in the CL, but that
 630  * is pretty unlikely outside of an i-g-t testcase.
 631  */
 632 static enum drm_gpu_sched_stat
 633 v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
 634                     u32 *timedout_ctca, u32 *timedout_ctra)
 635 {
 636         struct v3d_job *job = to_v3d_job(sched_job);
 637         struct v3d_dev *v3d = job->v3d;
 638         u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
 639         u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
 640
 641         if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
 642                 *timedout_ctca = ctca;
 643                 *timedout_ctra = ctra;
 644                 return DRM_GPU_SCHED_STAT_NOMINAL;
 645         }
 646
 647         return v3d_gpu_reset_for_timeout(v3d, sched_job);
 648 }
 649
 650 static enum drm_gpu_sched_stat
 651 v3d_bin_job_timedout(struct drm_sched_job *sched_job)
 652 {
 653         struct v3d_bin_job *job = to_bin_job(sched_job);
 654
 655         return v3d_cl_job_timedout(sched_job, V3D_BIN,
 656                                    &job->timedout_ctca, &job->timedout_ctra);
 657 }
 658
 659 static enum drm_gpu_sched_stat
 660 v3d_render_job_timedout(struct drm_sched_job *sched_job)
 661 {
 662         struct v3d_render_job *job = to_render_job(sched_job);
 663
 664         return v3d_cl_job_timedout(sched_job, V3D_RENDER,
 665                                    &job->timedout_ctca, &job->timedout_ctra);
 666 }
 667
 668 static enum drm_gpu_sched_stat
 669 v3d_generic_job_timedout(struct drm_sched_job *sched_job)
 670 {
 671         struct v3d_job *job = to_v3d_job(sched_job);
 672
 673         return v3d_gpu_reset_for_timeout(job->v3d, sched_job);
 674 }
 675
 676 static enum drm_gpu_sched_stat
 677 v3d_csd_job_timedout(struct drm_sched_job *sched_job)
 678 {
 679         struct v3d_csd_job *job = to_csd_job(sched_job);
 680         struct v3d_dev *v3d = job->base.v3d;
 681         u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver));
 682
 683         /* If we've made progress, skip reset and let the timer get
 684          * rearmed.
 685          */
 686         if (job->timedout_batches != batches) {
 687                 job->timedout_batches = batches;
 688                 return DRM_GPU_SCHED_STAT_NOMINAL;
 689         }
 690
 691         return v3d_gpu_reset_for_timeout(v3d, sched_job);
 692 }
 693
 694 static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
 695         .run_job = v3d_bin_job_run,
 696         .timedout_job = v3d_bin_job_timedout,
 697         .free_job = v3d_sched_job_free,
 698 };
 699
 700 static const struct drm_sched_backend_ops v3d_render_sched_ops = {
 701         .run_job = v3d_render_job_run,
 702         .timedout_job = v3d_render_job_timedout,
 703         .free_job = v3d_sched_job_free,
 704 };
 705
 706 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
 707         .run_job = v3d_tfu_job_run,
 708         .timedout_job = v3d_generic_job_timedout,
 709         .free_job = v3d_sched_job_free,
 710 };
 711
 712 static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
 713         .run_job = v3d_csd_job_run,
 714         .timedout_job = v3d_csd_job_timedout,
 715         .free_job = v3d_sched_job_free
 716 };
 717
 718 static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
 719         .run_job = v3d_cache_clean_job_run,
 720         .timedout_job = v3d_generic_job_timedout,
 721         .free_job = v3d_sched_job_free
 722 };
 723
 724 static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
 725         .run_job = v3d_cpu_job_run,
 726         .timedout_job = v3d_generic_job_timedout,
 727         .free_job = v3d_cpu_job_free
 728 };
 729
 730 int
 731 v3d_sched_init(struct v3d_dev *v3d)
 732 {
 733         int hw_jobs_limit = 1;
 734         int job_hang_limit = 0;
 735         int hang_limit_ms = 500;
 736         int ret;
 737
 738         ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 739                              &v3d_bin_sched_ops, NULL,
 740                              DRM_SCHED_PRIORITY_COUNT,
 741                              hw_jobs_limit, job_hang_limit,
 742                              msecs_to_jiffies(hang_limit_ms), NULL,
 743                              NULL, "v3d_bin", v3d->drm.dev);
 744         if (ret)
 745                 return ret;
 746
 747         ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
 748                              &v3d_render_sched_ops, NULL,
 749                              DRM_SCHED_PRIORITY_COUNT,
 750                              hw_jobs_limit, job_hang_limit,
 751                              msecs_to_jiffies(hang_limit_ms), NULL,
 752                              NULL, "v3d_render", v3d->drm.dev);
 753         if (ret)
 754                 goto fail;
 755
 756         ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
 757                              &v3d_tfu_sched_ops, NULL,
 758                              DRM_SCHED_PRIORITY_COUNT,
 759                              hw_jobs_limit, job_hang_limit,
 760                              msecs_to_jiffies(hang_limit_ms), NULL,
 761                              NULL, "v3d_tfu", v3d->drm.dev);
 762         if (ret)
 763                 goto fail;
 764
 765         if (v3d_has_csd(v3d)) {
 766                 ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
 767                                      &v3d_csd_sched_ops, NULL,
 768                                      DRM_SCHED_PRIORITY_COUNT,
 769                                      hw_jobs_limit, job_hang_limit,
 770                                      msecs_to_jiffies(hang_limit_ms), NULL,
 771                                      NULL, "v3d_csd", v3d->drm.dev);
 772                 if (ret)
 773                         goto fail;
 774
 775                 ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
 776                                      &v3d_cache_clean_sched_ops, NULL,
 777                                      DRM_SCHED_PRIORITY_COUNT,
 778                                      hw_jobs_limit, job_hang_limit,
 779                                      msecs_to_jiffies(hang_limit_ms), NULL,
 780                                      NULL, "v3d_cache_clean", v3d->drm.dev);
 781                 if (ret)
 782                         goto fail;
 783         }
 784
 785         ret = drm_sched_init(&v3d->queue[V3D_CPU].sched,
 786                              &v3d_cpu_sched_ops, NULL,
 787                              DRM_SCHED_PRIORITY_COUNT,
 788                              1, job_hang_limit,
 789                              msecs_to_jiffies(hang_limit_ms), NULL,
 790                              NULL, "v3d_cpu", v3d->drm.dev);
 791         if (ret)
 792                 goto fail;
 793
 794         return 0;
 795
 796 fail:
 797         v3d_sched_fini(v3d);
 798         return ret;
 799 }
 800
 801 void
 802 v3d_sched_fini(struct v3d_dev *v3d)
 803 {
 804         enum v3d_queue q;
 805
 806         for (q = 0; q < V3D_MAX_QUEUES; q++) {
 807                 if (v3d->queue[q].sched.ready)
 808                         drm_sched_fini(&v3d->queue[q].sched);
 809         }
 810 }