]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/selftests/i915_request.c
platform/x86: amd-pmc: Move to later in the suspend process
[linux.git] / drivers / gpu / drm / i915 / selftests / i915_request.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/prime_numbers.h>
26 #include <linux/pm_qos.h>
27 #include <linux/sort.h>
28
29 #include "gem/i915_gem_pm.h"
30 #include "gem/selftests/mock_context.h"
31
32 #include "gt/intel_engine_heartbeat.h"
33 #include "gt/intel_engine_pm.h"
34 #include "gt/intel_engine_user.h"
35 #include "gt/intel_gt.h"
36 #include "gt/intel_gt_clock_utils.h"
37 #include "gt/intel_gt_requests.h"
38 #include "gt/selftest_engine_heartbeat.h"
39
40 #include "i915_random.h"
41 #include "i915_selftest.h"
42 #include "igt_flush_test.h"
43 #include "igt_live_test.h"
44 #include "igt_spinner.h"
45 #include "lib_sw_fence.h"
46
47 #include "mock_drm.h"
48 #include "mock_gem_device.h"
49
50 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
51 {
52         struct intel_engine_cs *engine;
53         unsigned int count;
54
55         count = 0;
56         for_each_uabi_engine(engine, i915)
57                 count++;
58
59         return count;
60 }
61
62 static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
63 {
64         return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0);
65 }
66
67 static int igt_add_request(void *arg)
68 {
69         struct drm_i915_private *i915 = arg;
70         struct i915_request *request;
71
72         /* Basic preliminary test to create a request and let it loose! */
73
74         request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
75         if (!request)
76                 return -ENOMEM;
77
78         i915_request_add(request);
79
80         return 0;
81 }
82
83 static int igt_wait_request(void *arg)
84 {
85         const long T = HZ / 4;
86         struct drm_i915_private *i915 = arg;
87         struct i915_request *request;
88         int err = -EINVAL;
89
90         /* Submit a request, then wait upon it */
91
92         request = mock_request(rcs0(i915)->kernel_context, T);
93         if (!request)
94                 return -ENOMEM;
95
96         i915_request_get(request);
97
98         if (i915_request_wait(request, 0, 0) != -ETIME) {
99                 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
100                 goto out_request;
101         }
102
103         if (i915_request_wait(request, 0, T) != -ETIME) {
104                 pr_err("request wait succeeded (expected timeout before submit!)\n");
105                 goto out_request;
106         }
107
108         if (i915_request_completed(request)) {
109                 pr_err("request completed before submit!!\n");
110                 goto out_request;
111         }
112
113         i915_request_add(request);
114
115         if (i915_request_wait(request, 0, 0) != -ETIME) {
116                 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
117                 goto out_request;
118         }
119
120         if (i915_request_completed(request)) {
121                 pr_err("request completed immediately!\n");
122                 goto out_request;
123         }
124
125         if (i915_request_wait(request, 0, T / 2) != -ETIME) {
126                 pr_err("request wait succeeded (expected timeout!)\n");
127                 goto out_request;
128         }
129
130         if (i915_request_wait(request, 0, T) == -ETIME) {
131                 pr_err("request wait timed out!\n");
132                 goto out_request;
133         }
134
135         if (!i915_request_completed(request)) {
136                 pr_err("request not complete after waiting!\n");
137                 goto out_request;
138         }
139
140         if (i915_request_wait(request, 0, T) == -ETIME) {
141                 pr_err("request wait timed out when already complete!\n");
142                 goto out_request;
143         }
144
145         err = 0;
146 out_request:
147         i915_request_put(request);
148         mock_device_flush(i915);
149         return err;
150 }
151
152 static int igt_fence_wait(void *arg)
153 {
154         const long T = HZ / 4;
155         struct drm_i915_private *i915 = arg;
156         struct i915_request *request;
157         int err = -EINVAL;
158
159         /* Submit a request, treat it as a fence and wait upon it */
160
161         request = mock_request(rcs0(i915)->kernel_context, T);
162         if (!request)
163                 return -ENOMEM;
164
165         if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
166                 pr_err("fence wait success before submit (expected timeout)!\n");
167                 goto out;
168         }
169
170         i915_request_add(request);
171
172         if (dma_fence_is_signaled(&request->fence)) {
173                 pr_err("fence signaled immediately!\n");
174                 goto out;
175         }
176
177         if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
178                 pr_err("fence wait success after submit (expected timeout)!\n");
179                 goto out;
180         }
181
182         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
183                 pr_err("fence wait timed out (expected success)!\n");
184                 goto out;
185         }
186
187         if (!dma_fence_is_signaled(&request->fence)) {
188                 pr_err("fence unsignaled after waiting!\n");
189                 goto out;
190         }
191
192         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
193                 pr_err("fence wait timed out when complete (expected success)!\n");
194                 goto out;
195         }
196
197         err = 0;
198 out:
199         mock_device_flush(i915);
200         return err;
201 }
202
203 static int igt_request_rewind(void *arg)
204 {
205         struct drm_i915_private *i915 = arg;
206         struct i915_request *request, *vip;
207         struct i915_gem_context *ctx[2];
208         struct intel_context *ce;
209         int err = -EINVAL;
210
211         ctx[0] = mock_context(i915, "A");
212         if (!ctx[0]) {
213                 err = -ENOMEM;
214                 goto err_ctx_0;
215         }
216
217         ce = i915_gem_context_get_engine(ctx[0], RCS0);
218         GEM_BUG_ON(IS_ERR(ce));
219         request = mock_request(ce, 2 * HZ);
220         intel_context_put(ce);
221         if (!request) {
222                 err = -ENOMEM;
223                 goto err_context_0;
224         }
225
226         i915_request_get(request);
227         i915_request_add(request);
228
229         ctx[1] = mock_context(i915, "B");
230         if (!ctx[1]) {
231                 err = -ENOMEM;
232                 goto err_ctx_1;
233         }
234
235         ce = i915_gem_context_get_engine(ctx[1], RCS0);
236         GEM_BUG_ON(IS_ERR(ce));
237         vip = mock_request(ce, 0);
238         intel_context_put(ce);
239         if (!vip) {
240                 err = -ENOMEM;
241                 goto err_context_1;
242         }
243
244         /* Simulate preemption by manual reordering */
245         if (!mock_cancel_request(request)) {
246                 pr_err("failed to cancel request (already executed)!\n");
247                 i915_request_add(vip);
248                 goto err_context_1;
249         }
250         i915_request_get(vip);
251         i915_request_add(vip);
252         rcu_read_lock();
253         request->engine->submit_request(request);
254         rcu_read_unlock();
255
256
257         if (i915_request_wait(vip, 0, HZ) == -ETIME) {
258                 pr_err("timed out waiting for high priority request\n");
259                 goto err;
260         }
261
262         if (i915_request_completed(request)) {
263                 pr_err("low priority request already completed\n");
264                 goto err;
265         }
266
267         err = 0;
268 err:
269         i915_request_put(vip);
270 err_context_1:
271         mock_context_close(ctx[1]);
272 err_ctx_1:
273         i915_request_put(request);
274 err_context_0:
275         mock_context_close(ctx[0]);
276 err_ctx_0:
277         mock_device_flush(i915);
278         return err;
279 }
280
281 struct smoketest {
282         struct intel_engine_cs *engine;
283         struct i915_gem_context **contexts;
284         atomic_long_t num_waits, num_fences;
285         int ncontexts, max_batch;
286         struct i915_request *(*request_alloc)(struct intel_context *ce);
287 };
288
289 static struct i915_request *
290 __mock_request_alloc(struct intel_context *ce)
291 {
292         return mock_request(ce, 0);
293 }
294
295 static struct i915_request *
296 __live_request_alloc(struct intel_context *ce)
297 {
298         return intel_context_create_request(ce);
299 }
300
301 static int __igt_breadcrumbs_smoketest(void *arg)
302 {
303         struct smoketest *t = arg;
304         const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
305         const unsigned int total = 4 * t->ncontexts + 1;
306         unsigned int num_waits = 0, num_fences = 0;
307         struct i915_request **requests;
308         I915_RND_STATE(prng);
309         unsigned int *order;
310         int err = 0;
311
312         /*
313          * A very simple test to catch the most egregious of list handling bugs.
314          *
315          * At its heart, we simply create oodles of requests running across
316          * multiple kthreads and enable signaling on them, for the sole purpose
317          * of stressing our breadcrumb handling. The only inspection we do is
318          * that the fences were marked as signaled.
319          */
320
321         requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
322         if (!requests)
323                 return -ENOMEM;
324
325         order = i915_random_order(total, &prng);
326         if (!order) {
327                 err = -ENOMEM;
328                 goto out_requests;
329         }
330
331         while (!kthread_should_stop()) {
332                 struct i915_sw_fence *submit, *wait;
333                 unsigned int n, count;
334
335                 submit = heap_fence_create(GFP_KERNEL);
336                 if (!submit) {
337                         err = -ENOMEM;
338                         break;
339                 }
340
341                 wait = heap_fence_create(GFP_KERNEL);
342                 if (!wait) {
343                         i915_sw_fence_commit(submit);
344                         heap_fence_put(submit);
345                         err = -ENOMEM;
346                         break;
347                 }
348
349                 i915_random_reorder(order, total, &prng);
350                 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
351
352                 for (n = 0; n < count; n++) {
353                         struct i915_gem_context *ctx =
354                                 t->contexts[order[n] % t->ncontexts];
355                         struct i915_request *rq;
356                         struct intel_context *ce;
357
358                         ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
359                         GEM_BUG_ON(IS_ERR(ce));
360                         rq = t->request_alloc(ce);
361                         intel_context_put(ce);
362                         if (IS_ERR(rq)) {
363                                 err = PTR_ERR(rq);
364                                 count = n;
365                                 break;
366                         }
367
368                         err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
369                                                                submit,
370                                                                GFP_KERNEL);
371
372                         requests[n] = i915_request_get(rq);
373                         i915_request_add(rq);
374
375                         if (err >= 0)
376                                 err = i915_sw_fence_await_dma_fence(wait,
377                                                                     &rq->fence,
378                                                                     0,
379                                                                     GFP_KERNEL);
380
381                         if (err < 0) {
382                                 i915_request_put(rq);
383                                 count = n;
384                                 break;
385                         }
386                 }
387
388                 i915_sw_fence_commit(submit);
389                 i915_sw_fence_commit(wait);
390
391                 if (!wait_event_timeout(wait->wait,
392                                         i915_sw_fence_done(wait),
393                                         5 * HZ)) {
394                         struct i915_request *rq = requests[count - 1];
395
396                         pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
397                                atomic_read(&wait->pending), count,
398                                rq->fence.context, rq->fence.seqno,
399                                t->engine->name);
400                         GEM_TRACE_DUMP();
401
402                         intel_gt_set_wedged(t->engine->gt);
403                         GEM_BUG_ON(!i915_request_completed(rq));
404                         i915_sw_fence_wait(wait);
405                         err = -EIO;
406                 }
407
408                 for (n = 0; n < count; n++) {
409                         struct i915_request *rq = requests[n];
410
411                         if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
412                                       &rq->fence.flags)) {
413                                 pr_err("%llu:%llu was not signaled!\n",
414                                        rq->fence.context, rq->fence.seqno);
415                                 err = -EINVAL;
416                         }
417
418                         i915_request_put(rq);
419                 }
420
421                 heap_fence_put(wait);
422                 heap_fence_put(submit);
423
424                 if (err < 0)
425                         break;
426
427                 num_fences += count;
428                 num_waits++;
429
430                 cond_resched();
431         }
432
433         atomic_long_add(num_fences, &t->num_fences);
434         atomic_long_add(num_waits, &t->num_waits);
435
436         kfree(order);
437 out_requests:
438         kfree(requests);
439         return err;
440 }
441
442 static int mock_breadcrumbs_smoketest(void *arg)
443 {
444         struct drm_i915_private *i915 = arg;
445         struct smoketest t = {
446                 .engine = rcs0(i915),
447                 .ncontexts = 1024,
448                 .max_batch = 1024,
449                 .request_alloc = __mock_request_alloc
450         };
451         unsigned int ncpus = num_online_cpus();
452         struct task_struct **threads;
453         unsigned int n;
454         int ret = 0;
455
456         /*
457          * Smoketest our breadcrumb/signal handling for requests across multiple
458          * threads. A very simple test to only catch the most egregious of bugs.
459          * See __igt_breadcrumbs_smoketest();
460          */
461
462         threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
463         if (!threads)
464                 return -ENOMEM;
465
466         t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
467         if (!t.contexts) {
468                 ret = -ENOMEM;
469                 goto out_threads;
470         }
471
472         for (n = 0; n < t.ncontexts; n++) {
473                 t.contexts[n] = mock_context(t.engine->i915, "mock");
474                 if (!t.contexts[n]) {
475                         ret = -ENOMEM;
476                         goto out_contexts;
477                 }
478         }
479
480         for (n = 0; n < ncpus; n++) {
481                 threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
482                                          &t, "igt/%d", n);
483                 if (IS_ERR(threads[n])) {
484                         ret = PTR_ERR(threads[n]);
485                         ncpus = n;
486                         break;
487                 }
488
489                 get_task_struct(threads[n]);
490         }
491
492         yield(); /* start all threads before we begin */
493         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
494
495         for (n = 0; n < ncpus; n++) {
496                 int err;
497
498                 err = kthread_stop(threads[n]);
499                 if (err < 0 && !ret)
500                         ret = err;
501
502                 put_task_struct(threads[n]);
503         }
504         pr_info("Completed %lu waits for %lu fence across %d cpus\n",
505                 atomic_long_read(&t.num_waits),
506                 atomic_long_read(&t.num_fences),
507                 ncpus);
508
509 out_contexts:
510         for (n = 0; n < t.ncontexts; n++) {
511                 if (!t.contexts[n])
512                         break;
513                 mock_context_close(t.contexts[n]);
514         }
515         kfree(t.contexts);
516 out_threads:
517         kfree(threads);
518         return ret;
519 }
520
521 int i915_request_mock_selftests(void)
522 {
523         static const struct i915_subtest tests[] = {
524                 SUBTEST(igt_add_request),
525                 SUBTEST(igt_wait_request),
526                 SUBTEST(igt_fence_wait),
527                 SUBTEST(igt_request_rewind),
528                 SUBTEST(mock_breadcrumbs_smoketest),
529         };
530         struct drm_i915_private *i915;
531         intel_wakeref_t wakeref;
532         int err = 0;
533
534         i915 = mock_gem_device();
535         if (!i915)
536                 return -ENOMEM;
537
538         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
539                 err = i915_subtests(tests, i915);
540
541         mock_destroy_device(i915);
542
543         return err;
544 }
545
546 static int live_nop_request(void *arg)
547 {
548         struct drm_i915_private *i915 = arg;
549         struct intel_engine_cs *engine;
550         struct igt_live_test t;
551         int err = -ENODEV;
552
553         /*
554          * Submit various sized batches of empty requests, to each engine
555          * (individually), and wait for the batch to complete. We can check
556          * the overhead of submitting requests to the hardware.
557          */
558
559         for_each_uabi_engine(engine, i915) {
560                 unsigned long n, prime;
561                 IGT_TIMEOUT(end_time);
562                 ktime_t times[2] = {};
563
564                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
565                 if (err)
566                         return err;
567
568                 intel_engine_pm_get(engine);
569                 for_each_prime_number_from(prime, 1, 8192) {
570                         struct i915_request *request = NULL;
571
572                         times[1] = ktime_get_raw();
573
574                         for (n = 0; n < prime; n++) {
575                                 i915_request_put(request);
576                                 request = i915_request_create(engine->kernel_context);
577                                 if (IS_ERR(request))
578                                         return PTR_ERR(request);
579
580                                 /*
581                                  * This space is left intentionally blank.
582                                  *
583                                  * We do not actually want to perform any
584                                  * action with this request, we just want
585                                  * to measure the latency in allocation
586                                  * and submission of our breadcrumbs -
587                                  * ensuring that the bare request is sufficient
588                                  * for the system to work (i.e. proper HEAD
589                                  * tracking of the rings, interrupt handling,
590                                  * etc). It also gives us the lowest bounds
591                                  * for latency.
592                                  */
593
594                                 i915_request_get(request);
595                                 i915_request_add(request);
596                         }
597                         i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
598                         i915_request_put(request);
599
600                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
601                         if (prime == 1)
602                                 times[0] = times[1];
603
604                         if (__igt_timeout(end_time, NULL))
605                                 break;
606                 }
607                 intel_engine_pm_put(engine);
608
609                 err = igt_live_test_end(&t);
610                 if (err)
611                         return err;
612
613                 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
614                         engine->name,
615                         ktime_to_ns(times[0]),
616                         prime, div64_u64(ktime_to_ns(times[1]), prime));
617         }
618
619         return err;
620 }
621
622 static int __cancel_inactive(struct intel_engine_cs *engine)
623 {
624         struct intel_context *ce;
625         struct igt_spinner spin;
626         struct i915_request *rq;
627         int err = 0;
628
629         if (igt_spinner_init(&spin, engine->gt))
630                 return -ENOMEM;
631
632         ce = intel_context_create(engine);
633         if (IS_ERR(ce)) {
634                 err = PTR_ERR(ce);
635                 goto out_spin;
636         }
637
638         rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
639         if (IS_ERR(rq)) {
640                 err = PTR_ERR(rq);
641                 goto out_ce;
642         }
643
644         pr_debug("%s: Cancelling inactive request\n", engine->name);
645         i915_request_cancel(rq, -EINTR);
646         i915_request_get(rq);
647         i915_request_add(rq);
648
649         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
650                 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
651
652                 pr_err("%s: Failed to cancel inactive request\n", engine->name);
653                 intel_engine_dump(engine, &p, "%s\n", engine->name);
654                 err = -ETIME;
655                 goto out_rq;
656         }
657
658         if (rq->fence.error != -EINTR) {
659                 pr_err("%s: fence not cancelled (%u)\n",
660                        engine->name, rq->fence.error);
661                 err = -EINVAL;
662         }
663
664 out_rq:
665         i915_request_put(rq);
666 out_ce:
667         intel_context_put(ce);
668 out_spin:
669         igt_spinner_fini(&spin);
670         if (err)
671                 pr_err("%s: %s error %d\n", __func__, engine->name, err);
672         return err;
673 }
674
675 static int __cancel_active(struct intel_engine_cs *engine)
676 {
677         struct intel_context *ce;
678         struct igt_spinner spin;
679         struct i915_request *rq;
680         int err = 0;
681
682         if (igt_spinner_init(&spin, engine->gt))
683                 return -ENOMEM;
684
685         ce = intel_context_create(engine);
686         if (IS_ERR(ce)) {
687                 err = PTR_ERR(ce);
688                 goto out_spin;
689         }
690
691         rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
692         if (IS_ERR(rq)) {
693                 err = PTR_ERR(rq);
694                 goto out_ce;
695         }
696
697         pr_debug("%s: Cancelling active request\n", engine->name);
698         i915_request_get(rq);
699         i915_request_add(rq);
700         if (!igt_wait_for_spinner(&spin, rq)) {
701                 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
702
703                 pr_err("Failed to start spinner on %s\n", engine->name);
704                 intel_engine_dump(engine, &p, "%s\n", engine->name);
705                 err = -ETIME;
706                 goto out_rq;
707         }
708         i915_request_cancel(rq, -EINTR);
709
710         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
711                 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
712
713                 pr_err("%s: Failed to cancel active request\n", engine->name);
714                 intel_engine_dump(engine, &p, "%s\n", engine->name);
715                 err = -ETIME;
716                 goto out_rq;
717         }
718
719         if (rq->fence.error != -EINTR) {
720                 pr_err("%s: fence not cancelled (%u)\n",
721                        engine->name, rq->fence.error);
722                 err = -EINVAL;
723         }
724
725 out_rq:
726         i915_request_put(rq);
727 out_ce:
728         intel_context_put(ce);
729 out_spin:
730         igt_spinner_fini(&spin);
731         if (err)
732                 pr_err("%s: %s error %d\n", __func__, engine->name, err);
733         return err;
734 }
735
736 static int __cancel_completed(struct intel_engine_cs *engine)
737 {
738         struct intel_context *ce;
739         struct igt_spinner spin;
740         struct i915_request *rq;
741         int err = 0;
742
743         if (igt_spinner_init(&spin, engine->gt))
744                 return -ENOMEM;
745
746         ce = intel_context_create(engine);
747         if (IS_ERR(ce)) {
748                 err = PTR_ERR(ce);
749                 goto out_spin;
750         }
751
752         rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
753         if (IS_ERR(rq)) {
754                 err = PTR_ERR(rq);
755                 goto out_ce;
756         }
757         igt_spinner_end(&spin);
758         i915_request_get(rq);
759         i915_request_add(rq);
760
761         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
762                 err = -ETIME;
763                 goto out_rq;
764         }
765
766         pr_debug("%s: Cancelling completed request\n", engine->name);
767         i915_request_cancel(rq, -EINTR);
768         if (rq->fence.error) {
769                 pr_err("%s: fence not cancelled (%u)\n",
770                        engine->name, rq->fence.error);
771                 err = -EINVAL;
772         }
773
774 out_rq:
775         i915_request_put(rq);
776 out_ce:
777         intel_context_put(ce);
778 out_spin:
779         igt_spinner_fini(&spin);
780         if (err)
781                 pr_err("%s: %s error %d\n", __func__, engine->name, err);
782         return err;
783 }
784
785 static int live_cancel_request(void *arg)
786 {
787         struct drm_i915_private *i915 = arg;
788         struct intel_engine_cs *engine;
789
790         /*
791          * Check cancellation of requests. We expect to be able to immediately
792          * cancel active requests, even if they are currently on the GPU.
793          */
794
795         for_each_uabi_engine(engine, i915) {
796                 struct igt_live_test t;
797                 int err, err2;
798
799                 if (!intel_engine_has_preemption(engine))
800                         continue;
801
802                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
803                 if (err)
804                         return err;
805
806                 err = __cancel_inactive(engine);
807                 if (err == 0)
808                         err = __cancel_active(engine);
809                 if (err == 0)
810                         err = __cancel_completed(engine);
811
812                 err2 = igt_live_test_end(&t);
813                 if (err)
814                         return err;
815                 if (err2)
816                         return err2;
817         }
818
819         return 0;
820 }
821
822 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
823 {
824         struct drm_i915_gem_object *obj;
825         struct i915_vma *vma;
826         u32 *cmd;
827         int err;
828
829         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
830         if (IS_ERR(obj))
831                 return ERR_CAST(obj);
832
833         cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
834         if (IS_ERR(cmd)) {
835                 err = PTR_ERR(cmd);
836                 goto err;
837         }
838
839         *cmd = MI_BATCH_BUFFER_END;
840
841         __i915_gem_object_flush_map(obj, 0, 64);
842         i915_gem_object_unpin_map(obj);
843
844         intel_gt_chipset_flush(to_gt(i915));
845
846         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
847         if (IS_ERR(vma)) {
848                 err = PTR_ERR(vma);
849                 goto err;
850         }
851
852         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
853         if (err)
854                 goto err;
855
856         /* Force the wait wait now to avoid including it in the benchmark */
857         err = i915_vma_sync(vma);
858         if (err)
859                 goto err_pin;
860
861         return vma;
862
863 err_pin:
864         i915_vma_unpin(vma);
865 err:
866         i915_gem_object_put(obj);
867         return ERR_PTR(err);
868 }
869
870 static struct i915_request *
871 empty_request(struct intel_engine_cs *engine,
872               struct i915_vma *batch)
873 {
874         struct i915_request *request;
875         int err;
876
877         request = i915_request_create(engine->kernel_context);
878         if (IS_ERR(request))
879                 return request;
880
881         err = engine->emit_bb_start(request,
882                                     batch->node.start,
883                                     batch->node.size,
884                                     I915_DISPATCH_SECURE);
885         if (err)
886                 goto out_request;
887
888         i915_request_get(request);
889 out_request:
890         i915_request_add(request);
891         return err ? ERR_PTR(err) : request;
892 }
893
894 static int live_empty_request(void *arg)
895 {
896         struct drm_i915_private *i915 = arg;
897         struct intel_engine_cs *engine;
898         struct igt_live_test t;
899         struct i915_vma *batch;
900         int err = 0;
901
902         /*
903          * Submit various sized batches of empty requests, to each engine
904          * (individually), and wait for the batch to complete. We can check
905          * the overhead of submitting requests to the hardware.
906          */
907
908         batch = empty_batch(i915);
909         if (IS_ERR(batch))
910                 return PTR_ERR(batch);
911
912         for_each_uabi_engine(engine, i915) {
913                 IGT_TIMEOUT(end_time);
914                 struct i915_request *request;
915                 unsigned long n, prime;
916                 ktime_t times[2] = {};
917
918                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
919                 if (err)
920                         goto out_batch;
921
922                 intel_engine_pm_get(engine);
923
924                 /* Warmup / preload */
925                 request = empty_request(engine, batch);
926                 if (IS_ERR(request)) {
927                         err = PTR_ERR(request);
928                         intel_engine_pm_put(engine);
929                         goto out_batch;
930                 }
931                 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
932
933                 for_each_prime_number_from(prime, 1, 8192) {
934                         times[1] = ktime_get_raw();
935
936                         for (n = 0; n < prime; n++) {
937                                 i915_request_put(request);
938                                 request = empty_request(engine, batch);
939                                 if (IS_ERR(request)) {
940                                         err = PTR_ERR(request);
941                                         intel_engine_pm_put(engine);
942                                         goto out_batch;
943                                 }
944                         }
945                         i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
946
947                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
948                         if (prime == 1)
949                                 times[0] = times[1];
950
951                         if (__igt_timeout(end_time, NULL))
952                                 break;
953                 }
954                 i915_request_put(request);
955                 intel_engine_pm_put(engine);
956
957                 err = igt_live_test_end(&t);
958                 if (err)
959                         goto out_batch;
960
961                 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
962                         engine->name,
963                         ktime_to_ns(times[0]),
964                         prime, div64_u64(ktime_to_ns(times[1]), prime));
965         }
966
967 out_batch:
968         i915_vma_unpin(batch);
969         i915_vma_put(batch);
970         return err;
971 }
972
973 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
974 {
975         struct drm_i915_gem_object *obj;
976         const int ver = GRAPHICS_VER(i915);
977         struct i915_vma *vma;
978         u32 *cmd;
979         int err;
980
981         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
982         if (IS_ERR(obj))
983                 return ERR_CAST(obj);
984
985         vma = i915_vma_instance(obj, to_gt(i915)->vm, NULL);
986         if (IS_ERR(vma)) {
987                 err = PTR_ERR(vma);
988                 goto err;
989         }
990
991         err = i915_vma_pin(vma, 0, 0, PIN_USER);
992         if (err)
993                 goto err;
994
995         cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
996         if (IS_ERR(cmd)) {
997                 err = PTR_ERR(cmd);
998                 goto err;
999         }
1000
1001         if (ver >= 8) {
1002                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
1003                 *cmd++ = lower_32_bits(vma->node.start);
1004                 *cmd++ = upper_32_bits(vma->node.start);
1005         } else if (ver >= 6) {
1006                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
1007                 *cmd++ = lower_32_bits(vma->node.start);
1008         } else {
1009                 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1010                 *cmd++ = lower_32_bits(vma->node.start);
1011         }
1012         *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
1013
1014         __i915_gem_object_flush_map(obj, 0, 64);
1015         i915_gem_object_unpin_map(obj);
1016
1017         intel_gt_chipset_flush(to_gt(i915));
1018
1019         return vma;
1020
1021 err:
1022         i915_gem_object_put(obj);
1023         return ERR_PTR(err);
1024 }
1025
1026 static int recursive_batch_resolve(struct i915_vma *batch)
1027 {
1028         u32 *cmd;
1029
1030         cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1031         if (IS_ERR(cmd))
1032                 return PTR_ERR(cmd);
1033
1034         *cmd = MI_BATCH_BUFFER_END;
1035
1036         __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
1037         i915_gem_object_unpin_map(batch->obj);
1038
1039         intel_gt_chipset_flush(batch->vm->gt);
1040
1041         return 0;
1042 }
1043
1044 static int live_all_engines(void *arg)
1045 {
1046         struct drm_i915_private *i915 = arg;
1047         const unsigned int nengines = num_uabi_engines(i915);
1048         struct intel_engine_cs *engine;
1049         struct i915_request **request;
1050         struct igt_live_test t;
1051         struct i915_vma *batch;
1052         unsigned int idx;
1053         int err;
1054
1055         /*
1056          * Check we can submit requests to all engines simultaneously. We
1057          * send a recursive batch to each engine - checking that we don't
1058          * block doing so, and that they don't complete too soon.
1059          */
1060
1061         request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1062         if (!request)
1063                 return -ENOMEM;
1064
1065         err = igt_live_test_begin(&t, i915, __func__, "");
1066         if (err)
1067                 goto out_free;
1068
1069         batch = recursive_batch(i915);
1070         if (IS_ERR(batch)) {
1071                 err = PTR_ERR(batch);
1072                 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
1073                 goto out_free;
1074         }
1075
1076         i915_vma_lock(batch);
1077
1078         idx = 0;
1079         for_each_uabi_engine(engine, i915) {
1080                 request[idx] = intel_engine_create_kernel_request(engine);
1081                 if (IS_ERR(request[idx])) {
1082                         err = PTR_ERR(request[idx]);
1083                         pr_err("%s: Request allocation failed with err=%d\n",
1084                                __func__, err);
1085                         goto out_request;
1086                 }
1087
1088                 err = i915_request_await_object(request[idx], batch->obj, 0);
1089                 if (err == 0)
1090                         err = i915_vma_move_to_active(batch, request[idx], 0);
1091                 GEM_BUG_ON(err);
1092
1093                 err = engine->emit_bb_start(request[idx],
1094                                             batch->node.start,
1095                                             batch->node.size,
1096                                             0);
1097                 GEM_BUG_ON(err);
1098                 request[idx]->batch = batch;
1099
1100                 i915_request_get(request[idx]);
1101                 i915_request_add(request[idx]);
1102                 idx++;
1103         }
1104
1105         i915_vma_unlock(batch);
1106
1107         idx = 0;
1108         for_each_uabi_engine(engine, i915) {
1109                 if (i915_request_completed(request[idx])) {
1110                         pr_err("%s(%s): request completed too early!\n",
1111                                __func__, engine->name);
1112                         err = -EINVAL;
1113                         goto out_request;
1114                 }
1115                 idx++;
1116         }
1117
1118         err = recursive_batch_resolve(batch);
1119         if (err) {
1120                 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
1121                 goto out_request;
1122         }
1123
1124         idx = 0;
1125         for_each_uabi_engine(engine, i915) {
1126                 long timeout;
1127
1128                 timeout = i915_request_wait(request[idx], 0,
1129                                             MAX_SCHEDULE_TIMEOUT);
1130                 if (timeout < 0) {
1131                         err = timeout;
1132                         pr_err("%s: error waiting for request on %s, err=%d\n",
1133                                __func__, engine->name, err);
1134                         goto out_request;
1135                 }
1136
1137                 GEM_BUG_ON(!i915_request_completed(request[idx]));
1138                 i915_request_put(request[idx]);
1139                 request[idx] = NULL;
1140                 idx++;
1141         }
1142
1143         err = igt_live_test_end(&t);
1144
1145 out_request:
1146         idx = 0;
1147         for_each_uabi_engine(engine, i915) {
1148                 if (request[idx])
1149                         i915_request_put(request[idx]);
1150                 idx++;
1151         }
1152         i915_vma_unpin(batch);
1153         i915_vma_put(batch);
1154 out_free:
1155         kfree(request);
1156         return err;
1157 }
1158
1159 static int live_sequential_engines(void *arg)
1160 {
1161         struct drm_i915_private *i915 = arg;
1162         const unsigned int nengines = num_uabi_engines(i915);
1163         struct i915_request **request;
1164         struct i915_request *prev = NULL;
1165         struct intel_engine_cs *engine;
1166         struct igt_live_test t;
1167         unsigned int idx;
1168         int err;
1169
1170         /*
1171          * Check we can submit requests to all engines sequentially, such
1172          * that each successive request waits for the earlier ones. This
1173          * tests that we don't execute requests out of order, even though
1174          * they are running on independent engines.
1175          */
1176
1177         request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1178         if (!request)
1179                 return -ENOMEM;
1180
1181         err = igt_live_test_begin(&t, i915, __func__, "");
1182         if (err)
1183                 goto out_free;
1184
1185         idx = 0;
1186         for_each_uabi_engine(engine, i915) {
1187                 struct i915_vma *batch;
1188
1189                 batch = recursive_batch(i915);
1190                 if (IS_ERR(batch)) {
1191                         err = PTR_ERR(batch);
1192                         pr_err("%s: Unable to create batch for %s, err=%d\n",
1193                                __func__, engine->name, err);
1194                         goto out_free;
1195                 }
1196
1197                 i915_vma_lock(batch);
1198                 request[idx] = intel_engine_create_kernel_request(engine);
1199                 if (IS_ERR(request[idx])) {
1200                         err = PTR_ERR(request[idx]);
1201                         pr_err("%s: Request allocation failed for %s with err=%d\n",
1202                                __func__, engine->name, err);
1203                         goto out_unlock;
1204                 }
1205
1206                 if (prev) {
1207                         err = i915_request_await_dma_fence(request[idx],
1208                                                            &prev->fence);
1209                         if (err) {
1210                                 i915_request_add(request[idx]);
1211                                 pr_err("%s: Request await failed for %s with err=%d\n",
1212                                        __func__, engine->name, err);
1213                                 goto out_unlock;
1214                         }
1215                 }
1216
1217                 err = i915_request_await_object(request[idx],
1218                                                 batch->obj, false);
1219                 if (err == 0)
1220                         err = i915_vma_move_to_active(batch, request[idx], 0);
1221                 GEM_BUG_ON(err);
1222
1223                 err = engine->emit_bb_start(request[idx],
1224                                             batch->node.start,
1225                                             batch->node.size,
1226                                             0);
1227                 GEM_BUG_ON(err);
1228                 request[idx]->batch = batch;
1229
1230                 i915_request_get(request[idx]);
1231                 i915_request_add(request[idx]);
1232
1233                 prev = request[idx];
1234                 idx++;
1235
1236 out_unlock:
1237                 i915_vma_unlock(batch);
1238                 if (err)
1239                         goto out_request;
1240         }
1241
1242         idx = 0;
1243         for_each_uabi_engine(engine, i915) {
1244                 long timeout;
1245
1246                 if (i915_request_completed(request[idx])) {
1247                         pr_err("%s(%s): request completed too early!\n",
1248                                __func__, engine->name);
1249                         err = -EINVAL;
1250                         goto out_request;
1251                 }
1252
1253                 err = recursive_batch_resolve(request[idx]->batch);
1254                 if (err) {
1255                         pr_err("%s: failed to resolve batch, err=%d\n",
1256                                __func__, err);
1257                         goto out_request;
1258                 }
1259
1260                 timeout = i915_request_wait(request[idx], 0,
1261                                             MAX_SCHEDULE_TIMEOUT);
1262                 if (timeout < 0) {
1263                         err = timeout;
1264                         pr_err("%s: error waiting for request on %s, err=%d\n",
1265                                __func__, engine->name, err);
1266                         goto out_request;
1267                 }
1268
1269                 GEM_BUG_ON(!i915_request_completed(request[idx]));
1270                 idx++;
1271         }
1272
1273         err = igt_live_test_end(&t);
1274
1275 out_request:
1276         idx = 0;
1277         for_each_uabi_engine(engine, i915) {
1278                 u32 *cmd;
1279
1280                 if (!request[idx])
1281                         break;
1282
1283                 cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj,
1284                                                        I915_MAP_WC);
1285                 if (!IS_ERR(cmd)) {
1286                         *cmd = MI_BATCH_BUFFER_END;
1287
1288                         __i915_gem_object_flush_map(request[idx]->batch->obj,
1289                                                     0, sizeof(*cmd));
1290                         i915_gem_object_unpin_map(request[idx]->batch->obj);
1291
1292                         intel_gt_chipset_flush(engine->gt);
1293                 }
1294
1295                 i915_vma_put(request[idx]->batch);
1296                 i915_request_put(request[idx]);
1297                 idx++;
1298         }
1299 out_free:
1300         kfree(request);
1301         return err;
1302 }
1303
1304 static int __live_parallel_engine1(void *arg)
1305 {
1306         struct intel_engine_cs *engine = arg;
1307         IGT_TIMEOUT(end_time);
1308         unsigned long count;
1309         int err = 0;
1310
1311         count = 0;
1312         intel_engine_pm_get(engine);
1313         do {
1314                 struct i915_request *rq;
1315
1316                 rq = i915_request_create(engine->kernel_context);
1317                 if (IS_ERR(rq)) {
1318                         err = PTR_ERR(rq);
1319                         break;
1320                 }
1321
1322                 i915_request_get(rq);
1323                 i915_request_add(rq);
1324
1325                 err = 0;
1326                 if (i915_request_wait(rq, 0, HZ) < 0)
1327                         err = -ETIME;
1328                 i915_request_put(rq);
1329                 if (err)
1330                         break;
1331
1332                 count++;
1333         } while (!__igt_timeout(end_time, NULL));
1334         intel_engine_pm_put(engine);
1335
1336         pr_info("%s: %lu request + sync\n", engine->name, count);
1337         return err;
1338 }
1339
1340 static int __live_parallel_engineN(void *arg)
1341 {
1342         struct intel_engine_cs *engine = arg;
1343         IGT_TIMEOUT(end_time);
1344         unsigned long count;
1345         int err = 0;
1346
1347         count = 0;
1348         intel_engine_pm_get(engine);
1349         do {
1350                 struct i915_request *rq;
1351
1352                 rq = i915_request_create(engine->kernel_context);
1353                 if (IS_ERR(rq)) {
1354                         err = PTR_ERR(rq);
1355                         break;
1356                 }
1357
1358                 i915_request_add(rq);
1359                 count++;
1360         } while (!__igt_timeout(end_time, NULL));
1361         intel_engine_pm_put(engine);
1362
1363         pr_info("%s: %lu requests\n", engine->name, count);
1364         return err;
1365 }
1366
1367 static bool wake_all(struct drm_i915_private *i915)
1368 {
1369         if (atomic_dec_and_test(&i915->selftest.counter)) {
1370                 wake_up_var(&i915->selftest.counter);
1371                 return true;
1372         }
1373
1374         return false;
1375 }
1376
1377 static int wait_for_all(struct drm_i915_private *i915)
1378 {
1379         if (wake_all(i915))
1380                 return 0;
1381
1382         if (wait_var_event_timeout(&i915->selftest.counter,
1383                                    !atomic_read(&i915->selftest.counter),
1384                                    i915_selftest.timeout_jiffies))
1385                 return 0;
1386
1387         return -ETIME;
1388 }
1389
1390 static int __live_parallel_spin(void *arg)
1391 {
1392         struct intel_engine_cs *engine = arg;
1393         struct igt_spinner spin;
1394         struct i915_request *rq;
1395         int err = 0;
1396
1397         /*
1398          * Create a spinner running for eternity on each engine. If a second
1399          * spinner is incorrectly placed on the same engine, it will not be
1400          * able to start in time.
1401          */
1402
1403         if (igt_spinner_init(&spin, engine->gt)) {
1404                 wake_all(engine->i915);
1405                 return -ENOMEM;
1406         }
1407
1408         intel_engine_pm_get(engine);
1409         rq = igt_spinner_create_request(&spin,
1410                                         engine->kernel_context,
1411                                         MI_NOOP); /* no preemption */
1412         intel_engine_pm_put(engine);
1413         if (IS_ERR(rq)) {
1414                 err = PTR_ERR(rq);
1415                 if (err == -ENODEV)
1416                         err = 0;
1417                 wake_all(engine->i915);
1418                 goto out_spin;
1419         }
1420
1421         i915_request_get(rq);
1422         i915_request_add(rq);
1423         if (igt_wait_for_spinner(&spin, rq)) {
1424                 /* Occupy this engine for the whole test */
1425                 err = wait_for_all(engine->i915);
1426         } else {
1427                 pr_err("Failed to start spinner on %s\n", engine->name);
1428                 err = -EINVAL;
1429         }
1430         igt_spinner_end(&spin);
1431
1432         if (err == 0 && i915_request_wait(rq, 0, HZ) < 0)
1433                 err = -EIO;
1434         i915_request_put(rq);
1435
1436 out_spin:
1437         igt_spinner_fini(&spin);
1438         return err;
1439 }
1440
1441 static int live_parallel_engines(void *arg)
1442 {
1443         struct drm_i915_private *i915 = arg;
1444         static int (* const func[])(void *arg) = {
1445                 __live_parallel_engine1,
1446                 __live_parallel_engineN,
1447                 __live_parallel_spin,
1448                 NULL,
1449         };
1450         const unsigned int nengines = num_uabi_engines(i915);
1451         struct intel_engine_cs *engine;
1452         int (* const *fn)(void *arg);
1453         struct task_struct **tsk;
1454         int err = 0;
1455
1456         /*
1457          * Check we can submit requests to all engines concurrently. This
1458          * tests that we load up the system maximally.
1459          */
1460
1461         tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1462         if (!tsk)
1463                 return -ENOMEM;
1464
1465         for (fn = func; !err && *fn; fn++) {
1466                 char name[KSYM_NAME_LEN];
1467                 struct igt_live_test t;
1468                 unsigned int idx;
1469
1470                 snprintf(name, sizeof(name), "%ps", *fn);
1471                 err = igt_live_test_begin(&t, i915, __func__, name);
1472                 if (err)
1473                         break;
1474
1475                 atomic_set(&i915->selftest.counter, nengines);
1476
1477                 idx = 0;
1478                 for_each_uabi_engine(engine, i915) {
1479                         tsk[idx] = kthread_run(*fn, engine,
1480                                                "igt/parallel:%s",
1481                                                engine->name);
1482                         if (IS_ERR(tsk[idx])) {
1483                                 err = PTR_ERR(tsk[idx]);
1484                                 break;
1485                         }
1486                         get_task_struct(tsk[idx++]);
1487                 }
1488
1489                 yield(); /* start all threads before we kthread_stop() */
1490
1491                 idx = 0;
1492                 for_each_uabi_engine(engine, i915) {
1493                         int status;
1494
1495                         if (IS_ERR(tsk[idx]))
1496                                 break;
1497
1498                         status = kthread_stop(tsk[idx]);
1499                         if (status && !err)
1500                                 err = status;
1501
1502                         put_task_struct(tsk[idx++]);
1503                 }
1504
1505                 if (igt_live_test_end(&t))
1506                         err = -EIO;
1507         }
1508
1509         kfree(tsk);
1510         return err;
1511 }
1512
1513 static int
1514 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1515 {
1516         struct i915_request *rq;
1517         int ret;
1518
1519         /*
1520          * Before execlists, all contexts share the same ringbuffer. With
1521          * execlists, each context/engine has a separate ringbuffer and
1522          * for the purposes of this test, inexhaustible.
1523          *
1524          * For the global ringbuffer though, we have to be very careful
1525          * that we do not wrap while preventing the execution of requests
1526          * with a unsignaled fence.
1527          */
1528         if (HAS_EXECLISTS(ctx->i915))
1529                 return INT_MAX;
1530
1531         rq = igt_request_alloc(ctx, engine);
1532         if (IS_ERR(rq)) {
1533                 ret = PTR_ERR(rq);
1534         } else {
1535                 int sz;
1536
1537                 ret = rq->ring->size - rq->reserved_space;
1538                 i915_request_add(rq);
1539
1540                 sz = rq->ring->emit - rq->head;
1541                 if (sz < 0)
1542                         sz += rq->ring->size;
1543                 ret /= sz;
1544                 ret /= 2; /* leave half spare, in case of emergency! */
1545         }
1546
1547         return ret;
1548 }
1549
1550 static int live_breadcrumbs_smoketest(void *arg)
1551 {
1552         struct drm_i915_private *i915 = arg;
1553         const unsigned int nengines = num_uabi_engines(i915);
1554         const unsigned int ncpus = num_online_cpus();
1555         unsigned long num_waits, num_fences;
1556         struct intel_engine_cs *engine;
1557         struct task_struct **threads;
1558         struct igt_live_test live;
1559         intel_wakeref_t wakeref;
1560         struct smoketest *smoke;
1561         unsigned int n, idx;
1562         struct file *file;
1563         int ret = 0;
1564
1565         /*
1566          * Smoketest our breadcrumb/signal handling for requests across multiple
1567          * threads. A very simple test to only catch the most egregious of bugs.
1568          * See __igt_breadcrumbs_smoketest();
1569          *
1570          * On real hardware this time.
1571          */
1572
1573         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1574
1575         file = mock_file(i915);
1576         if (IS_ERR(file)) {
1577                 ret = PTR_ERR(file);
1578                 goto out_rpm;
1579         }
1580
1581         smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1582         if (!smoke) {
1583                 ret = -ENOMEM;
1584                 goto out_file;
1585         }
1586
1587         threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1588         if (!threads) {
1589                 ret = -ENOMEM;
1590                 goto out_smoke;
1591         }
1592
1593         smoke[0].request_alloc = __live_request_alloc;
1594         smoke[0].ncontexts = 64;
1595         smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1596                                     sizeof(*smoke[0].contexts),
1597                                     GFP_KERNEL);
1598         if (!smoke[0].contexts) {
1599                 ret = -ENOMEM;
1600                 goto out_threads;
1601         }
1602
1603         for (n = 0; n < smoke[0].ncontexts; n++) {
1604                 smoke[0].contexts[n] = live_context(i915, file);
1605                 if (IS_ERR(smoke[0].contexts[n])) {
1606                         ret = PTR_ERR(smoke[0].contexts[n]);
1607                         goto out_contexts;
1608                 }
1609         }
1610
1611         ret = igt_live_test_begin(&live, i915, __func__, "");
1612         if (ret)
1613                 goto out_contexts;
1614
1615         idx = 0;
1616         for_each_uabi_engine(engine, i915) {
1617                 smoke[idx] = smoke[0];
1618                 smoke[idx].engine = engine;
1619                 smoke[idx].max_batch =
1620                         max_batches(smoke[0].contexts[0], engine);
1621                 if (smoke[idx].max_batch < 0) {
1622                         ret = smoke[idx].max_batch;
1623                         goto out_flush;
1624                 }
1625                 /* One ring interleaved between requests from all cpus */
1626                 smoke[idx].max_batch /= num_online_cpus() + 1;
1627                 pr_debug("Limiting batches to %d requests on %s\n",
1628                          smoke[idx].max_batch, engine->name);
1629
1630                 for (n = 0; n < ncpus; n++) {
1631                         struct task_struct *tsk;
1632
1633                         tsk = kthread_run(__igt_breadcrumbs_smoketest,
1634                                           &smoke[idx], "igt/%d.%d", idx, n);
1635                         if (IS_ERR(tsk)) {
1636                                 ret = PTR_ERR(tsk);
1637                                 goto out_flush;
1638                         }
1639
1640                         get_task_struct(tsk);
1641                         threads[idx * ncpus + n] = tsk;
1642                 }
1643
1644                 idx++;
1645         }
1646
1647         yield(); /* start all threads before we begin */
1648         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1649
1650 out_flush:
1651         idx = 0;
1652         num_waits = 0;
1653         num_fences = 0;
1654         for_each_uabi_engine(engine, i915) {
1655                 for (n = 0; n < ncpus; n++) {
1656                         struct task_struct *tsk = threads[idx * ncpus + n];
1657                         int err;
1658
1659                         if (!tsk)
1660                                 continue;
1661
1662                         err = kthread_stop(tsk);
1663                         if (err < 0 && !ret)
1664                                 ret = err;
1665
1666                         put_task_struct(tsk);
1667                 }
1668
1669                 num_waits += atomic_long_read(&smoke[idx].num_waits);
1670                 num_fences += atomic_long_read(&smoke[idx].num_fences);
1671                 idx++;
1672         }
1673         pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1674                 num_waits, num_fences, idx, ncpus);
1675
1676         ret = igt_live_test_end(&live) ?: ret;
1677 out_contexts:
1678         kfree(smoke[0].contexts);
1679 out_threads:
1680         kfree(threads);
1681 out_smoke:
1682         kfree(smoke);
1683 out_file:
1684         fput(file);
1685 out_rpm:
1686         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1687
1688         return ret;
1689 }
1690
1691 int i915_request_live_selftests(struct drm_i915_private *i915)
1692 {
1693         static const struct i915_subtest tests[] = {
1694                 SUBTEST(live_nop_request),
1695                 SUBTEST(live_all_engines),
1696                 SUBTEST(live_sequential_engines),
1697                 SUBTEST(live_parallel_engines),
1698                 SUBTEST(live_empty_request),
1699                 SUBTEST(live_cancel_request),
1700                 SUBTEST(live_breadcrumbs_smoketest),
1701         };
1702
1703         if (intel_gt_is_wedged(to_gt(i915)))
1704                 return 0;
1705
1706         return i915_subtests(tests, i915);
1707 }
1708
1709 static int switch_to_kernel_sync(struct intel_context *ce, int err)
1710 {
1711         struct i915_request *rq;
1712         struct dma_fence *fence;
1713
1714         rq = intel_engine_create_kernel_request(ce->engine);
1715         if (IS_ERR(rq))
1716                 return PTR_ERR(rq);
1717
1718         fence = i915_active_fence_get(&ce->timeline->last_request);
1719         if (fence) {
1720                 i915_request_await_dma_fence(rq, fence);
1721                 dma_fence_put(fence);
1722         }
1723
1724         rq = i915_request_get(rq);
1725         i915_request_add(rq);
1726         if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
1727                 err = -ETIME;
1728         i915_request_put(rq);
1729
1730         while (!err && !intel_engine_is_idle(ce->engine))
1731                 intel_engine_flush_submission(ce->engine);
1732
1733         return err;
1734 }
1735
1736 struct perf_stats {
1737         struct intel_engine_cs *engine;
1738         unsigned long count;
1739         ktime_t time;
1740         ktime_t busy;
1741         u64 runtime;
1742 };
1743
1744 struct perf_series {
1745         struct drm_i915_private *i915;
1746         unsigned int nengines;
1747         struct intel_context *ce[];
1748 };
1749
1750 static int cmp_u32(const void *A, const void *B)
1751 {
1752         const u32 *a = A, *b = B;
1753
1754         return *a - *b;
1755 }
1756
1757 static u32 trifilter(u32 *a)
1758 {
1759         u64 sum;
1760
1761 #define TF_COUNT 5
1762         sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL);
1763
1764         sum = mul_u32_u32(a[2], 2);
1765         sum += a[1];
1766         sum += a[3];
1767
1768         GEM_BUG_ON(sum > U32_MAX);
1769         return sum;
1770 #define TF_BIAS 2
1771 }
1772
1773 static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles)
1774 {
1775         u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles);
1776
1777         return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS);
1778 }
1779
1780 static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset)
1781 {
1782         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1783         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base)));
1784         *cs++ = offset;
1785         *cs++ = 0;
1786
1787         return cs;
1788 }
1789
1790 static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value)
1791 {
1792         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1793         *cs++ = offset;
1794         *cs++ = 0;
1795         *cs++ = value;
1796
1797         return cs;
1798 }
1799
1800 static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset)
1801 {
1802         *cs++ = MI_SEMAPHORE_WAIT |
1803                 MI_SEMAPHORE_GLOBAL_GTT |
1804                 MI_SEMAPHORE_POLL |
1805                 mode;
1806         *cs++ = value;
1807         *cs++ = offset;
1808         *cs++ = 0;
1809
1810         return cs;
1811 }
1812
1813 static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value)
1814 {
1815         return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset);
1816 }
1817
1818 static void semaphore_set(u32 *sema, u32 value)
1819 {
1820         WRITE_ONCE(*sema, value);
1821         wmb(); /* flush the update to the cache, and beyond */
1822 }
1823
1824 static u32 *hwsp_scratch(const struct intel_context *ce)
1825 {
1826         return memset32(ce->engine->status_page.addr + 1000, 0, 21);
1827 }
1828
1829 static u32 hwsp_offset(const struct intel_context *ce, u32 *dw)
1830 {
1831         return (i915_ggtt_offset(ce->engine->status_page.vma) +
1832                 offset_in_page(dw));
1833 }
1834
1835 static int measure_semaphore_response(struct intel_context *ce)
1836 {
1837         u32 *sema = hwsp_scratch(ce);
1838         const u32 offset = hwsp_offset(ce, sema);
1839         u32 elapsed[TF_COUNT], cycles;
1840         struct i915_request *rq;
1841         u32 *cs;
1842         int err;
1843         int i;
1844
1845         /*
1846          * Measure how many cycles it takes for the HW to detect the change
1847          * in a semaphore value.
1848          *
1849          *    A: read CS_TIMESTAMP from CPU
1850          *    poke semaphore
1851          *    B: read CS_TIMESTAMP on GPU
1852          *
1853          * Semaphore latency: B - A
1854          */
1855
1856         semaphore_set(sema, -1);
1857
1858         rq = i915_request_create(ce);
1859         if (IS_ERR(rq))
1860                 return PTR_ERR(rq);
1861
1862         cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed));
1863         if (IS_ERR(cs)) {
1864                 i915_request_add(rq);
1865                 err = PTR_ERR(cs);
1866                 goto err;
1867         }
1868
1869         cs = emit_store_dw(cs, offset, 0);
1870         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
1871                 cs = emit_semaphore_poll_until(cs, offset, i);
1872                 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
1873                 cs = emit_store_dw(cs, offset, 0);
1874         }
1875
1876         intel_ring_advance(rq, cs);
1877         i915_request_add(rq);
1878
1879         if (wait_for(READ_ONCE(*sema) == 0, 50)) {
1880                 err = -EIO;
1881                 goto err;
1882         }
1883
1884         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
1885                 preempt_disable();
1886                 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
1887                 semaphore_set(sema, i);
1888                 preempt_enable();
1889
1890                 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
1891                         err = -EIO;
1892                         goto err;
1893                 }
1894
1895                 elapsed[i - 1] = sema[i] - cycles;
1896         }
1897
1898         cycles = trifilter(elapsed);
1899         pr_info("%s: semaphore response %d cycles, %lluns\n",
1900                 ce->engine->name, cycles >> TF_BIAS,
1901                 cycles_to_ns(ce->engine, cycles));
1902
1903         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
1904
1905 err:
1906         intel_gt_set_wedged(ce->engine->gt);
1907         return err;
1908 }
1909
1910 static int measure_idle_dispatch(struct intel_context *ce)
1911 {
1912         u32 *sema = hwsp_scratch(ce);
1913         const u32 offset = hwsp_offset(ce, sema);
1914         u32 elapsed[TF_COUNT], cycles;
1915         u32 *cs;
1916         int err;
1917         int i;
1918
1919         /*
1920          * Measure how long it takes for us to submit a request while the
1921          * engine is idle, but is resting in our context.
1922          *
1923          *    A: read CS_TIMESTAMP from CPU
1924          *    submit request
1925          *    B: read CS_TIMESTAMP on GPU
1926          *
1927          * Submission latency: B - A
1928          */
1929
1930         for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
1931                 struct i915_request *rq;
1932
1933                 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
1934                 if (err)
1935                         return err;
1936
1937                 rq = i915_request_create(ce);
1938                 if (IS_ERR(rq)) {
1939                         err = PTR_ERR(rq);
1940                         goto err;
1941                 }
1942
1943                 cs = intel_ring_begin(rq, 4);
1944                 if (IS_ERR(cs)) {
1945                         i915_request_add(rq);
1946                         err = PTR_ERR(cs);
1947                         goto err;
1948                 }
1949
1950                 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
1951
1952                 intel_ring_advance(rq, cs);
1953
1954                 preempt_disable();
1955                 local_bh_disable();
1956                 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
1957                 i915_request_add(rq);
1958                 local_bh_enable();
1959                 preempt_enable();
1960         }
1961
1962         err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
1963         if (err)
1964                 goto err;
1965
1966         for (i = 0; i < ARRAY_SIZE(elapsed); i++)
1967                 elapsed[i] = sema[i] - elapsed[i];
1968
1969         cycles = trifilter(elapsed);
1970         pr_info("%s: idle dispatch latency %d cycles, %lluns\n",
1971                 ce->engine->name, cycles >> TF_BIAS,
1972                 cycles_to_ns(ce->engine, cycles));
1973
1974         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
1975
1976 err:
1977         intel_gt_set_wedged(ce->engine->gt);
1978         return err;
1979 }
1980
1981 static int measure_busy_dispatch(struct intel_context *ce)
1982 {
1983         u32 *sema = hwsp_scratch(ce);
1984         const u32 offset = hwsp_offset(ce, sema);
1985         u32 elapsed[TF_COUNT + 1], cycles;
1986         u32 *cs;
1987         int err;
1988         int i;
1989
1990         /*
1991          * Measure how long it takes for us to submit a request while the
1992          * engine is busy, polling on a semaphore in our context. With
1993          * direct submission, this will include the cost of a lite restore.
1994          *
1995          *    A: read CS_TIMESTAMP from CPU
1996          *    submit request
1997          *    B: read CS_TIMESTAMP on GPU
1998          *
1999          * Submission latency: B - A
2000          */
2001
2002         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2003                 struct i915_request *rq;
2004
2005                 rq = i915_request_create(ce);
2006                 if (IS_ERR(rq)) {
2007                         err = PTR_ERR(rq);
2008                         goto err;
2009                 }
2010
2011                 cs = intel_ring_begin(rq, 12);
2012                 if (IS_ERR(cs)) {
2013                         i915_request_add(rq);
2014                         err = PTR_ERR(cs);
2015                         goto err;
2016                 }
2017
2018                 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2019                 cs = emit_semaphore_poll_until(cs, offset, i);
2020                 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2021
2022                 intel_ring_advance(rq, cs);
2023
2024                 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) {
2025                         err = -EIO;
2026                         goto err;
2027                 }
2028
2029                 preempt_disable();
2030                 local_bh_disable();
2031                 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2032                 i915_request_add(rq);
2033                 local_bh_enable();
2034                 semaphore_set(sema, i - 1);
2035                 preempt_enable();
2036         }
2037
2038         wait_for(READ_ONCE(sema[i - 1]), 500);
2039         semaphore_set(sema, i - 1);
2040
2041         for (i = 1; i <= TF_COUNT; i++) {
2042                 GEM_BUG_ON(sema[i] == -1);
2043                 elapsed[i - 1] = sema[i] - elapsed[i];
2044         }
2045
2046         cycles = trifilter(elapsed);
2047         pr_info("%s: busy dispatch latency %d cycles, %lluns\n",
2048                 ce->engine->name, cycles >> TF_BIAS,
2049                 cycles_to_ns(ce->engine, cycles));
2050
2051         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2052
2053 err:
2054         intel_gt_set_wedged(ce->engine->gt);
2055         return err;
2056 }
2057
2058 static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value)
2059 {
2060         const u32 offset =
2061                 i915_ggtt_offset(engine->status_page.vma) +
2062                 offset_in_page(sema);
2063         struct i915_request *rq;
2064         u32 *cs;
2065
2066         rq = i915_request_create(engine->kernel_context);
2067         if (IS_ERR(rq))
2068                 return PTR_ERR(rq);
2069
2070         cs = intel_ring_begin(rq, 4);
2071         if (IS_ERR(cs)) {
2072                 i915_request_add(rq);
2073                 return PTR_ERR(cs);
2074         }
2075
2076         cs = emit_semaphore_poll(cs, mode, value, offset);
2077
2078         intel_ring_advance(rq, cs);
2079         i915_request_add(rq);
2080
2081         return 0;
2082 }
2083
2084 static int measure_inter_request(struct intel_context *ce)
2085 {
2086         u32 *sema = hwsp_scratch(ce);
2087         const u32 offset = hwsp_offset(ce, sema);
2088         u32 elapsed[TF_COUNT + 1], cycles;
2089         struct i915_sw_fence *submit;
2090         int i, err;
2091
2092         /*
2093          * Measure how long it takes to advance from one request into the
2094          * next. Between each request we flush the GPU caches to memory,
2095          * update the breadcrumbs, and then invalidate those caches.
2096          * We queue up all the requests to be submitted in one batch so
2097          * it should be one set of contiguous measurements.
2098          *
2099          *    A: read CS_TIMESTAMP on GPU
2100          *    advance request
2101          *    B: read CS_TIMESTAMP on GPU
2102          *
2103          * Request latency: B - A
2104          */
2105
2106         err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2107         if (err)
2108                 return err;
2109
2110         submit = heap_fence_create(GFP_KERNEL);
2111         if (!submit) {
2112                 semaphore_set(sema, 1);
2113                 return -ENOMEM;
2114         }
2115
2116         intel_engine_flush_submission(ce->engine);
2117         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2118                 struct i915_request *rq;
2119                 u32 *cs;
2120
2121                 rq = i915_request_create(ce);
2122                 if (IS_ERR(rq)) {
2123                         err = PTR_ERR(rq);
2124                         goto err_submit;
2125                 }
2126
2127                 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
2128                                                        submit,
2129                                                        GFP_KERNEL);
2130                 if (err < 0) {
2131                         i915_request_add(rq);
2132                         goto err_submit;
2133                 }
2134
2135                 cs = intel_ring_begin(rq, 4);
2136                 if (IS_ERR(cs)) {
2137                         i915_request_add(rq);
2138                         err = PTR_ERR(cs);
2139                         goto err_submit;
2140                 }
2141
2142                 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2143
2144                 intel_ring_advance(rq, cs);
2145                 i915_request_add(rq);
2146         }
2147         i915_sw_fence_commit(submit);
2148         intel_engine_flush_submission(ce->engine);
2149         heap_fence_put(submit);
2150
2151         semaphore_set(sema, 1);
2152         err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2153         if (err)
2154                 goto err;
2155
2156         for (i = 1; i <= TF_COUNT; i++)
2157                 elapsed[i - 1] = sema[i + 1] - sema[i];
2158
2159         cycles = trifilter(elapsed);
2160         pr_info("%s: inter-request latency %d cycles, %lluns\n",
2161                 ce->engine->name, cycles >> TF_BIAS,
2162                 cycles_to_ns(ce->engine, cycles));
2163
2164         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2165
2166 err_submit:
2167         i915_sw_fence_commit(submit);
2168         heap_fence_put(submit);
2169         semaphore_set(sema, 1);
2170 err:
2171         intel_gt_set_wedged(ce->engine->gt);
2172         return err;
2173 }
2174
2175 static int measure_context_switch(struct intel_context *ce)
2176 {
2177         u32 *sema = hwsp_scratch(ce);
2178         const u32 offset = hwsp_offset(ce, sema);
2179         struct i915_request *fence = NULL;
2180         u32 elapsed[TF_COUNT + 1], cycles;
2181         int i, j, err;
2182         u32 *cs;
2183
2184         /*
2185          * Measure how long it takes to advance from one request in one
2186          * context to a request in another context. This allows us to
2187          * measure how long the context save/restore take, along with all
2188          * the inter-context setup we require.
2189          *
2190          *    A: read CS_TIMESTAMP on GPU
2191          *    switch context
2192          *    B: read CS_TIMESTAMP on GPU
2193          *
2194          * Context switch latency: B - A
2195          */
2196
2197         err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2198         if (err)
2199                 return err;
2200
2201         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2202                 struct intel_context *arr[] = {
2203                         ce, ce->engine->kernel_context
2204                 };
2205                 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32);
2206
2207                 for (j = 0; j < ARRAY_SIZE(arr); j++) {
2208                         struct i915_request *rq;
2209
2210                         rq = i915_request_create(arr[j]);
2211                         if (IS_ERR(rq)) {
2212                                 err = PTR_ERR(rq);
2213                                 goto err_fence;
2214                         }
2215
2216                         if (fence) {
2217                                 err = i915_request_await_dma_fence(rq,
2218                                                                    &fence->fence);
2219                                 if (err) {
2220                                         i915_request_add(rq);
2221                                         goto err_fence;
2222                                 }
2223                         }
2224
2225                         cs = intel_ring_begin(rq, 4);
2226                         if (IS_ERR(cs)) {
2227                                 i915_request_add(rq);
2228                                 err = PTR_ERR(cs);
2229                                 goto err_fence;
2230                         }
2231
2232                         cs = emit_timestamp_store(cs, ce, addr);
2233                         addr += sizeof(u32);
2234
2235                         intel_ring_advance(rq, cs);
2236
2237                         i915_request_put(fence);
2238                         fence = i915_request_get(rq);
2239
2240                         i915_request_add(rq);
2241                 }
2242         }
2243         i915_request_put(fence);
2244         intel_engine_flush_submission(ce->engine);
2245
2246         semaphore_set(sema, 1);
2247         err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2248         if (err)
2249                 goto err;
2250
2251         for (i = 1; i <= TF_COUNT; i++)
2252                 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1];
2253
2254         cycles = trifilter(elapsed);
2255         pr_info("%s: context switch latency %d cycles, %lluns\n",
2256                 ce->engine->name, cycles >> TF_BIAS,
2257                 cycles_to_ns(ce->engine, cycles));
2258
2259         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2260
2261 err_fence:
2262         i915_request_put(fence);
2263         semaphore_set(sema, 1);
2264 err:
2265         intel_gt_set_wedged(ce->engine->gt);
2266         return err;
2267 }
2268
2269 static int measure_preemption(struct intel_context *ce)
2270 {
2271         u32 *sema = hwsp_scratch(ce);
2272         const u32 offset = hwsp_offset(ce, sema);
2273         u32 elapsed[TF_COUNT], cycles;
2274         u32 *cs;
2275         int err;
2276         int i;
2277
2278         /*
2279          * We measure two latencies while triggering preemption. The first
2280          * latency is how long it takes for us to submit a preempting request.
2281          * The second latency is how it takes for us to return from the
2282          * preemption back to the original context.
2283          *
2284          *    A: read CS_TIMESTAMP from CPU
2285          *    submit preemption
2286          *    B: read CS_TIMESTAMP on GPU (in preempting context)
2287          *    context switch
2288          *    C: read CS_TIMESTAMP on GPU (in original context)
2289          *
2290          * Preemption dispatch latency: B - A
2291          * Preemption switch latency: C - B
2292          */
2293
2294         if (!intel_engine_has_preemption(ce->engine))
2295                 return 0;
2296
2297         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2298                 u32 addr = offset + 2 * i * sizeof(u32);
2299                 struct i915_request *rq;
2300
2301                 rq = i915_request_create(ce);
2302                 if (IS_ERR(rq)) {
2303                         err = PTR_ERR(rq);
2304                         goto err;
2305                 }
2306
2307                 cs = intel_ring_begin(rq, 12);
2308                 if (IS_ERR(cs)) {
2309                         i915_request_add(rq);
2310                         err = PTR_ERR(cs);
2311                         goto err;
2312                 }
2313
2314                 cs = emit_store_dw(cs, addr, -1);
2315                 cs = emit_semaphore_poll_until(cs, offset, i);
2316                 cs = emit_timestamp_store(cs, ce, addr + sizeof(u32));
2317
2318                 intel_ring_advance(rq, cs);
2319                 i915_request_add(rq);
2320
2321                 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) {
2322                         err = -EIO;
2323                         goto err;
2324                 }
2325
2326                 rq = i915_request_create(ce->engine->kernel_context);
2327                 if (IS_ERR(rq)) {
2328                         err = PTR_ERR(rq);
2329                         goto err;
2330                 }
2331
2332                 cs = intel_ring_begin(rq, 8);
2333                 if (IS_ERR(cs)) {
2334                         i915_request_add(rq);
2335                         err = PTR_ERR(cs);
2336                         goto err;
2337                 }
2338
2339                 cs = emit_timestamp_store(cs, ce, addr);
2340                 cs = emit_store_dw(cs, offset, i);
2341
2342                 intel_ring_advance(rq, cs);
2343                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2344
2345                 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2346                 i915_request_add(rq);
2347         }
2348
2349         if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) {
2350                 err = -EIO;
2351                 goto err;
2352         }
2353
2354         for (i = 1; i <= TF_COUNT; i++)
2355                 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1];
2356
2357         cycles = trifilter(elapsed);
2358         pr_info("%s: preemption dispatch latency %d cycles, %lluns\n",
2359                 ce->engine->name, cycles >> TF_BIAS,
2360                 cycles_to_ns(ce->engine, cycles));
2361
2362         for (i = 1; i <= TF_COUNT; i++)
2363                 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0];
2364
2365         cycles = trifilter(elapsed);
2366         pr_info("%s: preemption switch latency %d cycles, %lluns\n",
2367                 ce->engine->name, cycles >> TF_BIAS,
2368                 cycles_to_ns(ce->engine, cycles));
2369
2370         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2371
2372 err:
2373         intel_gt_set_wedged(ce->engine->gt);
2374         return err;
2375 }
2376
2377 struct signal_cb {
2378         struct dma_fence_cb base;
2379         bool seen;
2380 };
2381
2382 static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
2383 {
2384         struct signal_cb *s = container_of(cb, typeof(*s), base);
2385
2386         smp_store_mb(s->seen, true); /* be safe, be strong */
2387 }
2388
2389 static int measure_completion(struct intel_context *ce)
2390 {
2391         u32 *sema = hwsp_scratch(ce);
2392         const u32 offset = hwsp_offset(ce, sema);
2393         u32 elapsed[TF_COUNT], cycles;
2394         u32 *cs;
2395         int err;
2396         int i;
2397
2398         /*
2399          * Measure how long it takes for the signal (interrupt) to be
2400          * sent from the GPU to be processed by the CPU.
2401          *
2402          *    A: read CS_TIMESTAMP on GPU
2403          *    signal
2404          *    B: read CS_TIMESTAMP from CPU
2405          *
2406          * Completion latency: B - A
2407          */
2408
2409         for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2410                 struct signal_cb cb = { .seen = false };
2411                 struct i915_request *rq;
2412
2413                 rq = i915_request_create(ce);
2414                 if (IS_ERR(rq)) {
2415                         err = PTR_ERR(rq);
2416                         goto err;
2417                 }
2418
2419                 cs = intel_ring_begin(rq, 12);
2420                 if (IS_ERR(cs)) {
2421                         i915_request_add(rq);
2422                         err = PTR_ERR(cs);
2423                         goto err;
2424                 }
2425
2426                 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2427                 cs = emit_semaphore_poll_until(cs, offset, i);
2428                 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2429
2430                 intel_ring_advance(rq, cs);
2431
2432                 dma_fence_add_callback(&rq->fence, &cb.base, signal_cb);
2433                 i915_request_add(rq);
2434
2435                 intel_engine_flush_submission(ce->engine);
2436                 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
2437                         err = -EIO;
2438                         goto err;
2439                 }
2440
2441                 preempt_disable();
2442                 semaphore_set(sema, i);
2443                 while (!READ_ONCE(cb.seen))
2444                         cpu_relax();
2445
2446                 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2447                 preempt_enable();
2448         }
2449
2450         err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2451         if (err)
2452                 goto err;
2453
2454         for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2455                 GEM_BUG_ON(sema[i + 1] == -1);
2456                 elapsed[i] = elapsed[i] - sema[i + 1];
2457         }
2458
2459         cycles = trifilter(elapsed);
2460         pr_info("%s: completion latency %d cycles, %lluns\n",
2461                 ce->engine->name, cycles >> TF_BIAS,
2462                 cycles_to_ns(ce->engine, cycles));
2463
2464         return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2465
2466 err:
2467         intel_gt_set_wedged(ce->engine->gt);
2468         return err;
2469 }
2470
2471 static void rps_pin(struct intel_gt *gt)
2472 {
2473         /* Pin the frequency to max */
2474         atomic_inc(&gt->rps.num_waiters);
2475         intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
2476
2477         mutex_lock(&gt->rps.lock);
2478         intel_rps_set(&gt->rps, gt->rps.max_freq);
2479         mutex_unlock(&gt->rps.lock);
2480 }
2481
2482 static void rps_unpin(struct intel_gt *gt)
2483 {
2484         intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
2485         atomic_dec(&gt->rps.num_waiters);
2486 }
2487
2488 static int perf_request_latency(void *arg)
2489 {
2490         struct drm_i915_private *i915 = arg;
2491         struct intel_engine_cs *engine;
2492         struct pm_qos_request qos;
2493         int err = 0;
2494
2495         if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */
2496                 return 0;
2497
2498         cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2499
2500         for_each_uabi_engine(engine, i915) {
2501                 struct intel_context *ce;
2502
2503                 ce = intel_context_create(engine);
2504                 if (IS_ERR(ce)) {
2505                         err = PTR_ERR(ce);
2506                         goto out;
2507                 }
2508
2509                 err = intel_context_pin(ce);
2510                 if (err) {
2511                         intel_context_put(ce);
2512                         goto out;
2513                 }
2514
2515                 st_engine_heartbeat_disable(engine);
2516                 rps_pin(engine->gt);
2517
2518                 if (err == 0)
2519                         err = measure_semaphore_response(ce);
2520                 if (err == 0)
2521                         err = measure_idle_dispatch(ce);
2522                 if (err == 0)
2523                         err = measure_busy_dispatch(ce);
2524                 if (err == 0)
2525                         err = measure_inter_request(ce);
2526                 if (err == 0)
2527                         err = measure_context_switch(ce);
2528                 if (err == 0)
2529                         err = measure_preemption(ce);
2530                 if (err == 0)
2531                         err = measure_completion(ce);
2532
2533                 rps_unpin(engine->gt);
2534                 st_engine_heartbeat_enable(engine);
2535
2536                 intel_context_unpin(ce);
2537                 intel_context_put(ce);
2538                 if (err)
2539                         goto out;
2540         }
2541
2542 out:
2543         if (igt_flush_test(i915))
2544                 err = -EIO;
2545
2546         cpu_latency_qos_remove_request(&qos);
2547         return err;
2548 }
2549
2550 static int s_sync0(void *arg)
2551 {
2552         struct perf_series *ps = arg;
2553         IGT_TIMEOUT(end_time);
2554         unsigned int idx = 0;
2555         int err = 0;
2556
2557         GEM_BUG_ON(!ps->nengines);
2558         do {
2559                 struct i915_request *rq;
2560
2561                 rq = i915_request_create(ps->ce[idx]);
2562                 if (IS_ERR(rq)) {
2563                         err = PTR_ERR(rq);
2564                         break;
2565                 }
2566
2567                 i915_request_get(rq);
2568                 i915_request_add(rq);
2569
2570                 if (i915_request_wait(rq, 0, HZ / 5) < 0)
2571                         err = -ETIME;
2572                 i915_request_put(rq);
2573                 if (err)
2574                         break;
2575
2576                 if (++idx == ps->nengines)
2577                         idx = 0;
2578         } while (!__igt_timeout(end_time, NULL));
2579
2580         return err;
2581 }
2582
2583 static int s_sync1(void *arg)
2584 {
2585         struct perf_series *ps = arg;
2586         struct i915_request *prev = NULL;
2587         IGT_TIMEOUT(end_time);
2588         unsigned int idx = 0;
2589         int err = 0;
2590
2591         GEM_BUG_ON(!ps->nengines);
2592         do {
2593                 struct i915_request *rq;
2594
2595                 rq = i915_request_create(ps->ce[idx]);
2596                 if (IS_ERR(rq)) {
2597                         err = PTR_ERR(rq);
2598                         break;
2599                 }
2600
2601                 i915_request_get(rq);
2602                 i915_request_add(rq);
2603
2604                 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
2605                         err = -ETIME;
2606                 i915_request_put(prev);
2607                 prev = rq;
2608                 if (err)
2609                         break;
2610
2611                 if (++idx == ps->nengines)
2612                         idx = 0;
2613         } while (!__igt_timeout(end_time, NULL));
2614         i915_request_put(prev);
2615
2616         return err;
2617 }
2618
2619 static int s_many(void *arg)
2620 {
2621         struct perf_series *ps = arg;
2622         IGT_TIMEOUT(end_time);
2623         unsigned int idx = 0;
2624
2625         GEM_BUG_ON(!ps->nengines);
2626         do {
2627                 struct i915_request *rq;
2628
2629                 rq = i915_request_create(ps->ce[idx]);
2630                 if (IS_ERR(rq))
2631                         return PTR_ERR(rq);
2632
2633                 i915_request_add(rq);
2634
2635                 if (++idx == ps->nengines)
2636                         idx = 0;
2637         } while (!__igt_timeout(end_time, NULL));
2638
2639         return 0;
2640 }
2641
2642 static int perf_series_engines(void *arg)
2643 {
2644         struct drm_i915_private *i915 = arg;
2645         static int (* const func[])(void *arg) = {
2646                 s_sync0,
2647                 s_sync1,
2648                 s_many,
2649                 NULL,
2650         };
2651         const unsigned int nengines = num_uabi_engines(i915);
2652         struct intel_engine_cs *engine;
2653         int (* const *fn)(void *arg);
2654         struct pm_qos_request qos;
2655         struct perf_stats *stats;
2656         struct perf_series *ps;
2657         unsigned int idx;
2658         int err = 0;
2659
2660         stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
2661         if (!stats)
2662                 return -ENOMEM;
2663
2664         ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
2665         if (!ps) {
2666                 kfree(stats);
2667                 return -ENOMEM;
2668         }
2669
2670         cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2671
2672         ps->i915 = i915;
2673         ps->nengines = nengines;
2674
2675         idx = 0;
2676         for_each_uabi_engine(engine, i915) {
2677                 struct intel_context *ce;
2678
2679                 ce = intel_context_create(engine);
2680                 if (IS_ERR(ce)) {
2681                         err = PTR_ERR(ce);
2682                         goto out;
2683                 }
2684
2685                 err = intel_context_pin(ce);
2686                 if (err) {
2687                         intel_context_put(ce);
2688                         goto out;
2689                 }
2690
2691                 ps->ce[idx++] = ce;
2692         }
2693         GEM_BUG_ON(idx != ps->nengines);
2694
2695         for (fn = func; *fn && !err; fn++) {
2696                 char name[KSYM_NAME_LEN];
2697                 struct igt_live_test t;
2698
2699                 snprintf(name, sizeof(name), "%ps", *fn);
2700                 err = igt_live_test_begin(&t, i915, __func__, name);
2701                 if (err)
2702                         break;
2703
2704                 for (idx = 0; idx < nengines; idx++) {
2705                         struct perf_stats *p =
2706                                 memset(&stats[idx], 0, sizeof(stats[idx]));
2707                         struct intel_context *ce = ps->ce[idx];
2708
2709                         p->engine = ps->ce[idx]->engine;
2710                         intel_engine_pm_get(p->engine);
2711
2712                         if (intel_engine_supports_stats(p->engine))
2713                                 p->busy = intel_engine_get_busy_time(p->engine,
2714                                                                      &p->time) + 1;
2715                         else
2716                                 p->time = ktime_get();
2717                         p->runtime = -intel_context_get_total_runtime_ns(ce);
2718                 }
2719
2720                 err = (*fn)(ps);
2721                 if (igt_live_test_end(&t))
2722                         err = -EIO;
2723
2724                 for (idx = 0; idx < nengines; idx++) {
2725                         struct perf_stats *p = &stats[idx];
2726                         struct intel_context *ce = ps->ce[idx];
2727                         int integer, decimal;
2728                         u64 busy, dt, now;
2729
2730                         if (p->busy)
2731                                 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine,
2732                                                                                &now),
2733                                                     p->busy - 1);
2734                         else
2735                                 now = ktime_get();
2736                         p->time = ktime_sub(now, p->time);
2737
2738                         err = switch_to_kernel_sync(ce, err);
2739                         p->runtime += intel_context_get_total_runtime_ns(ce);
2740                         intel_engine_pm_put(p->engine);
2741
2742                         busy = 100 * ktime_to_ns(p->busy);
2743                         dt = ktime_to_ns(p->time);
2744                         if (dt) {
2745                                 integer = div64_u64(busy, dt);
2746                                 busy -= integer * dt;
2747                                 decimal = div64_u64(100 * busy, dt);
2748                         } else {
2749                                 integer = 0;
2750                                 decimal = 0;
2751                         }
2752
2753                         pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2754                                 name, p->engine->name, ce->timeline->seqno,
2755                                 integer, decimal,
2756                                 div_u64(p->runtime, 1000 * 1000),
2757                                 div_u64(ktime_to_ns(p->time), 1000 * 1000));
2758                 }
2759         }
2760
2761 out:
2762         for (idx = 0; idx < nengines; idx++) {
2763                 if (IS_ERR_OR_NULL(ps->ce[idx]))
2764                         break;
2765
2766                 intel_context_unpin(ps->ce[idx]);
2767                 intel_context_put(ps->ce[idx]);
2768         }
2769         kfree(ps);
2770
2771         cpu_latency_qos_remove_request(&qos);
2772         kfree(stats);
2773         return err;
2774 }
2775
2776 static int p_sync0(void *arg)
2777 {
2778         struct perf_stats *p = arg;
2779         struct intel_engine_cs *engine = p->engine;
2780         struct intel_context *ce;
2781         IGT_TIMEOUT(end_time);
2782         unsigned long count;
2783         bool busy;
2784         int err = 0;
2785
2786         ce = intel_context_create(engine);
2787         if (IS_ERR(ce))
2788                 return PTR_ERR(ce);
2789
2790         err = intel_context_pin(ce);
2791         if (err) {
2792                 intel_context_put(ce);
2793                 return err;
2794         }
2795
2796         if (intel_engine_supports_stats(engine)) {
2797                 p->busy = intel_engine_get_busy_time(engine, &p->time);
2798                 busy = true;
2799         } else {
2800                 p->time = ktime_get();
2801                 busy = false;
2802         }
2803
2804         count = 0;
2805         do {
2806                 struct i915_request *rq;
2807
2808                 rq = i915_request_create(ce);
2809                 if (IS_ERR(rq)) {
2810                         err = PTR_ERR(rq);
2811                         break;
2812                 }
2813
2814                 i915_request_get(rq);
2815                 i915_request_add(rq);
2816
2817                 err = 0;
2818                 if (i915_request_wait(rq, 0, HZ) < 0)
2819                         err = -ETIME;
2820                 i915_request_put(rq);
2821                 if (err)
2822                         break;
2823
2824                 count++;
2825         } while (!__igt_timeout(end_time, NULL));
2826
2827         if (busy) {
2828                 ktime_t now;
2829
2830                 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2831                                     p->busy);
2832                 p->time = ktime_sub(now, p->time);
2833         } else {
2834                 p->time = ktime_sub(ktime_get(), p->time);
2835         }
2836
2837         err = switch_to_kernel_sync(ce, err);
2838         p->runtime = intel_context_get_total_runtime_ns(ce);
2839         p->count = count;
2840
2841         intel_context_unpin(ce);
2842         intel_context_put(ce);
2843         return err;
2844 }
2845
2846 static int p_sync1(void *arg)
2847 {
2848         struct perf_stats *p = arg;
2849         struct intel_engine_cs *engine = p->engine;
2850         struct i915_request *prev = NULL;
2851         struct intel_context *ce;
2852         IGT_TIMEOUT(end_time);
2853         unsigned long count;
2854         bool busy;
2855         int err = 0;
2856
2857         ce = intel_context_create(engine);
2858         if (IS_ERR(ce))
2859                 return PTR_ERR(ce);
2860
2861         err = intel_context_pin(ce);
2862         if (err) {
2863                 intel_context_put(ce);
2864                 return err;
2865         }
2866
2867         if (intel_engine_supports_stats(engine)) {
2868                 p->busy = intel_engine_get_busy_time(engine, &p->time);
2869                 busy = true;
2870         } else {
2871                 p->time = ktime_get();
2872                 busy = false;
2873         }
2874
2875         count = 0;
2876         do {
2877                 struct i915_request *rq;
2878
2879                 rq = i915_request_create(ce);
2880                 if (IS_ERR(rq)) {
2881                         err = PTR_ERR(rq);
2882                         break;
2883                 }
2884
2885                 i915_request_get(rq);
2886                 i915_request_add(rq);
2887
2888                 err = 0;
2889                 if (prev && i915_request_wait(prev, 0, HZ) < 0)
2890                         err = -ETIME;
2891                 i915_request_put(prev);
2892                 prev = rq;
2893                 if (err)
2894                         break;
2895
2896                 count++;
2897         } while (!__igt_timeout(end_time, NULL));
2898         i915_request_put(prev);
2899
2900         if (busy) {
2901                 ktime_t now;
2902
2903                 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2904                                     p->busy);
2905                 p->time = ktime_sub(now, p->time);
2906         } else {
2907                 p->time = ktime_sub(ktime_get(), p->time);
2908         }
2909
2910         err = switch_to_kernel_sync(ce, err);
2911         p->runtime = intel_context_get_total_runtime_ns(ce);
2912         p->count = count;
2913
2914         intel_context_unpin(ce);
2915         intel_context_put(ce);
2916         return err;
2917 }
2918
2919 static int p_many(void *arg)
2920 {
2921         struct perf_stats *p = arg;
2922         struct intel_engine_cs *engine = p->engine;
2923         struct intel_context *ce;
2924         IGT_TIMEOUT(end_time);
2925         unsigned long count;
2926         int err = 0;
2927         bool busy;
2928
2929         ce = intel_context_create(engine);
2930         if (IS_ERR(ce))
2931                 return PTR_ERR(ce);
2932
2933         err = intel_context_pin(ce);
2934         if (err) {
2935                 intel_context_put(ce);
2936                 return err;
2937         }
2938
2939         if (intel_engine_supports_stats(engine)) {
2940                 p->busy = intel_engine_get_busy_time(engine, &p->time);
2941                 busy = true;
2942         } else {
2943                 p->time = ktime_get();
2944                 busy = false;
2945         }
2946
2947         count = 0;
2948         do {
2949                 struct i915_request *rq;
2950
2951                 rq = i915_request_create(ce);
2952                 if (IS_ERR(rq)) {
2953                         err = PTR_ERR(rq);
2954                         break;
2955                 }
2956
2957                 i915_request_add(rq);
2958                 count++;
2959         } while (!__igt_timeout(end_time, NULL));
2960
2961         if (busy) {
2962                 ktime_t now;
2963
2964                 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2965                                     p->busy);
2966                 p->time = ktime_sub(now, p->time);
2967         } else {
2968                 p->time = ktime_sub(ktime_get(), p->time);
2969         }
2970
2971         err = switch_to_kernel_sync(ce, err);
2972         p->runtime = intel_context_get_total_runtime_ns(ce);
2973         p->count = count;
2974
2975         intel_context_unpin(ce);
2976         intel_context_put(ce);
2977         return err;
2978 }
2979
2980 static int perf_parallel_engines(void *arg)
2981 {
2982         struct drm_i915_private *i915 = arg;
2983         static int (* const func[])(void *arg) = {
2984                 p_sync0,
2985                 p_sync1,
2986                 p_many,
2987                 NULL,
2988         };
2989         const unsigned int nengines = num_uabi_engines(i915);
2990         struct intel_engine_cs *engine;
2991         int (* const *fn)(void *arg);
2992         struct pm_qos_request qos;
2993         struct {
2994                 struct perf_stats p;
2995                 struct task_struct *tsk;
2996         } *engines;
2997         int err = 0;
2998
2999         engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
3000         if (!engines)
3001                 return -ENOMEM;
3002
3003         cpu_latency_qos_add_request(&qos, 0);
3004
3005         for (fn = func; *fn; fn++) {
3006                 char name[KSYM_NAME_LEN];
3007                 struct igt_live_test t;
3008                 unsigned int idx;
3009
3010                 snprintf(name, sizeof(name), "%ps", *fn);
3011                 err = igt_live_test_begin(&t, i915, __func__, name);
3012                 if (err)
3013                         break;
3014
3015                 atomic_set(&i915->selftest.counter, nengines);
3016
3017                 idx = 0;
3018                 for_each_uabi_engine(engine, i915) {
3019                         intel_engine_pm_get(engine);
3020
3021                         memset(&engines[idx].p, 0, sizeof(engines[idx].p));
3022                         engines[idx].p.engine = engine;
3023
3024                         engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
3025                                                        "igt:%s", engine->name);
3026                         if (IS_ERR(engines[idx].tsk)) {
3027                                 err = PTR_ERR(engines[idx].tsk);
3028                                 intel_engine_pm_put(engine);
3029                                 break;
3030                         }
3031                         get_task_struct(engines[idx++].tsk);
3032                 }
3033
3034                 yield(); /* start all threads before we kthread_stop() */
3035
3036                 idx = 0;
3037                 for_each_uabi_engine(engine, i915) {
3038                         int status;
3039
3040                         if (IS_ERR(engines[idx].tsk))
3041                                 break;
3042
3043                         status = kthread_stop(engines[idx].tsk);
3044                         if (status && !err)
3045                                 err = status;
3046
3047                         intel_engine_pm_put(engine);
3048                         put_task_struct(engines[idx++].tsk);
3049                 }
3050
3051                 if (igt_live_test_end(&t))
3052                         err = -EIO;
3053                 if (err)
3054                         break;
3055
3056                 idx = 0;
3057                 for_each_uabi_engine(engine, i915) {
3058                         struct perf_stats *p = &engines[idx].p;
3059                         u64 busy = 100 * ktime_to_ns(p->busy);
3060                         u64 dt = ktime_to_ns(p->time);
3061                         int integer, decimal;
3062
3063                         if (dt) {
3064                                 integer = div64_u64(busy, dt);
3065                                 busy -= integer * dt;
3066                                 decimal = div64_u64(100 * busy, dt);
3067                         } else {
3068                                 integer = 0;
3069                                 decimal = 0;
3070                         }
3071
3072                         GEM_BUG_ON(engine != p->engine);
3073                         pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
3074                                 name, engine->name, p->count, integer, decimal,
3075                                 div_u64(p->runtime, 1000 * 1000),
3076                                 div_u64(ktime_to_ns(p->time), 1000 * 1000));
3077                         idx++;
3078                 }
3079         }
3080
3081         cpu_latency_qos_remove_request(&qos);
3082         kfree(engines);
3083         return err;
3084 }
3085
3086 int i915_request_perf_selftests(struct drm_i915_private *i915)
3087 {
3088         static const struct i915_subtest tests[] = {
3089                 SUBTEST(perf_request_latency),
3090                 SUBTEST(perf_series_engines),
3091                 SUBTEST(perf_parallel_engines),
3092         };
3093
3094         if (intel_gt_is_wedged(to_gt(i915)))
3095                 return 0;
3096
3097         return i915_subtests(tests, i915);
3098 }
This page took 0.222402 seconds and 4 git commands to generate.