]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/selftests/i915_perf.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[linux.git] / drivers / gpu / drm / i915 / selftests / i915_perf.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6
7 #include <linux/kref.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_gt.h"
11
12 #include "i915_selftest.h"
13
14 #include "igt_flush_test.h"
15 #include "lib_sw_fence.h"
16
17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
18
19 static int
20 alloc_empty_config(struct i915_perf *perf)
21 {
22         struct i915_oa_config *oa_config;
23
24         oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
25         if (!oa_config)
26                 return -ENOMEM;
27
28         oa_config->perf = perf;
29         kref_init(&oa_config->ref);
30
31         strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
32
33         mutex_lock(&perf->metrics_lock);
34
35         oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
36         if (oa_config->id < 0)  {
37                 mutex_unlock(&perf->metrics_lock);
38                 i915_oa_config_put(oa_config);
39                 return -ENOMEM;
40         }
41
42         mutex_unlock(&perf->metrics_lock);
43
44         return 0;
45 }
46
47 static void
48 destroy_empty_config(struct i915_perf *perf)
49 {
50         struct i915_oa_config *oa_config = NULL, *tmp;
51         int id;
52
53         mutex_lock(&perf->metrics_lock);
54
55         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
56                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
57                         oa_config = tmp;
58                         break;
59                 }
60         }
61
62         if (oa_config)
63                 idr_remove(&perf->metrics_idr, oa_config->id);
64
65         mutex_unlock(&perf->metrics_lock);
66
67         if (oa_config)
68                 i915_oa_config_put(oa_config);
69 }
70
71 static struct i915_oa_config *
72 get_empty_config(struct i915_perf *perf)
73 {
74         struct i915_oa_config *oa_config = NULL, *tmp;
75         int id;
76
77         mutex_lock(&perf->metrics_lock);
78
79         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
80                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
81                         oa_config = i915_oa_config_get(tmp);
82                         break;
83                 }
84         }
85
86         mutex_unlock(&perf->metrics_lock);
87
88         return oa_config;
89 }
90
91 static struct i915_perf_stream *
92 test_stream(struct i915_perf *perf)
93 {
94         struct drm_i915_perf_open_param param = {};
95         struct i915_oa_config *oa_config = get_empty_config(perf);
96         struct perf_open_properties props = {
97                 .engine = intel_engine_lookup_user(perf->i915,
98                                                    I915_ENGINE_CLASS_RENDER,
99                                                    0),
100                 .sample_flags = SAMPLE_OA_REPORT,
101                 .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
102                 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
103         };
104         struct i915_perf_stream *stream;
105         struct intel_gt *gt;
106
107         if (!props.engine)
108                 return NULL;
109
110         gt = props.engine->gt;
111
112         if (!oa_config)
113                 return NULL;
114
115         props.metrics_set = oa_config->id;
116
117         stream = kzalloc(sizeof(*stream), GFP_KERNEL);
118         if (!stream) {
119                 i915_oa_config_put(oa_config);
120                 return NULL;
121         }
122
123         stream->perf = perf;
124
125         mutex_lock(&gt->perf.lock);
126         if (i915_oa_stream_init(stream, &param, &props)) {
127                 kfree(stream);
128                 stream =  NULL;
129         }
130         mutex_unlock(&gt->perf.lock);
131
132         i915_oa_config_put(oa_config);
133
134         return stream;
135 }
136
137 static void stream_destroy(struct i915_perf_stream *stream)
138 {
139         struct intel_gt *gt = stream->engine->gt;
140
141         mutex_lock(&gt->perf.lock);
142         i915_perf_destroy_locked(stream);
143         mutex_unlock(&gt->perf.lock);
144 }
145
146 static int live_sanitycheck(void *arg)
147 {
148         struct drm_i915_private *i915 = arg;
149         struct i915_perf_stream *stream;
150
151         /* Quick check we can create a perf stream */
152
153         stream = test_stream(&i915->perf);
154         if (!stream)
155                 return -EINVAL;
156
157         stream_destroy(stream);
158         return 0;
159 }
160
161 static int write_timestamp(struct i915_request *rq, int slot)
162 {
163         u32 *cs;
164         int len;
165
166         cs = intel_ring_begin(rq, 6);
167         if (IS_ERR(cs))
168                 return PTR_ERR(cs);
169
170         len = 5;
171         if (GRAPHICS_VER(rq->engine->i915) >= 8)
172                 len++;
173
174         *cs++ = GFX_OP_PIPE_CONTROL(len);
175         *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
176                 PIPE_CONTROL_STORE_DATA_INDEX |
177                 PIPE_CONTROL_WRITE_TIMESTAMP;
178         *cs++ = slot * sizeof(u32);
179         *cs++ = 0;
180         *cs++ = 0;
181         *cs++ = 0;
182
183         intel_ring_advance(rq, cs);
184
185         return 0;
186 }
187
188 static ktime_t poll_status(struct i915_request *rq, int slot)
189 {
190         while (!intel_read_status_page(rq->engine, slot) &&
191                !i915_request_completed(rq))
192                 cpu_relax();
193
194         return ktime_get();
195 }
196
197 static int live_noa_delay(void *arg)
198 {
199         struct drm_i915_private *i915 = arg;
200         struct i915_perf_stream *stream;
201         struct i915_request *rq;
202         ktime_t t0, t1;
203         u64 expected;
204         u32 delay;
205         int err;
206         int i;
207
208         /* Check that the GPU delays matches expectations */
209
210         stream = test_stream(&i915->perf);
211         if (!stream)
212                 return -ENOMEM;
213
214         expected = atomic64_read(&stream->perf->noa_programming_delay);
215
216         if (stream->engine->class != RENDER_CLASS) {
217                 err = -ENODEV;
218                 goto out;
219         }
220
221         for (i = 0; i < 4; i++)
222                 intel_write_status_page(stream->engine, 0x100 + i, 0);
223
224         rq = intel_engine_create_kernel_request(stream->engine);
225         if (IS_ERR(rq)) {
226                 err = PTR_ERR(rq);
227                 goto out;
228         }
229
230         if (rq->engine->emit_init_breadcrumb) {
231                 err = rq->engine->emit_init_breadcrumb(rq);
232                 if (err) {
233                         i915_request_add(rq);
234                         goto out;
235                 }
236         }
237
238         err = write_timestamp(rq, 0x100);
239         if (err) {
240                 i915_request_add(rq);
241                 goto out;
242         }
243
244         err = rq->engine->emit_bb_start(rq,
245                                         i915_ggtt_offset(stream->noa_wait), 0,
246                                         I915_DISPATCH_SECURE);
247         if (err) {
248                 i915_request_add(rq);
249                 goto out;
250         }
251
252         err = write_timestamp(rq, 0x102);
253         if (err) {
254                 i915_request_add(rq);
255                 goto out;
256         }
257
258         i915_request_get(rq);
259         i915_request_add(rq);
260
261         preempt_disable();
262         t0 = poll_status(rq, 0x100);
263         t1 = poll_status(rq, 0x102);
264         preempt_enable();
265
266         pr_info("CPU delay: %lluns, expected %lluns\n",
267                 ktime_sub(t1, t0), expected);
268
269         delay = intel_read_status_page(stream->engine, 0x102);
270         delay -= intel_read_status_page(stream->engine, 0x100);
271         delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
272         pr_info("GPU delay: %uns, expected %lluns\n",
273                 delay, expected);
274
275         if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
276                 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
277                        delay / 1000,
278                        div_u64(3 * expected, 4000),
279                        div_u64(3 * expected, 2000));
280                 err = -EINVAL;
281         }
282
283         i915_request_put(rq);
284 out:
285         stream_destroy(stream);
286         return err;
287 }
288
289 static int live_noa_gpr(void *arg)
290 {
291         struct drm_i915_private *i915 = arg;
292         struct i915_perf_stream *stream;
293         struct intel_context *ce;
294         struct i915_request *rq;
295         u32 *cs, *store;
296         void *scratch;
297         u32 gpr0;
298         int err;
299         int i;
300
301         /* Check that the delay does not clobber user context state (GPR) */
302
303         stream = test_stream(&i915->perf);
304         if (!stream)
305                 return -ENOMEM;
306
307         gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
308
309         ce = intel_context_create(stream->engine);
310         if (IS_ERR(ce)) {
311                 err = PTR_ERR(ce);
312                 goto out;
313         }
314
315         /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
316         scratch = __px_vaddr(ce->vm->scratch[0]);
317         memset(scratch, POISON_FREE, PAGE_SIZE);
318
319         rq = intel_context_create_request(ce);
320         if (IS_ERR(rq)) {
321                 err = PTR_ERR(rq);
322                 goto out_ce;
323         }
324         i915_request_get(rq);
325
326         if (rq->engine->emit_init_breadcrumb) {
327                 err = rq->engine->emit_init_breadcrumb(rq);
328                 if (err) {
329                         i915_request_add(rq);
330                         goto out_rq;
331                 }
332         }
333
334         /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
335         cs = intel_ring_begin(rq, 2 * 32 + 2);
336         if (IS_ERR(cs)) {
337                 err = PTR_ERR(cs);
338                 i915_request_add(rq);
339                 goto out_rq;
340         }
341
342         *cs++ = MI_LOAD_REGISTER_IMM(32);
343         for (i = 0; i < 32; i++) {
344                 *cs++ = gpr0 + i * sizeof(u32);
345                 *cs++ = STACK_MAGIC;
346         }
347         *cs++ = MI_NOOP;
348         intel_ring_advance(rq, cs);
349
350         /* Execute the GPU delay */
351         err = rq->engine->emit_bb_start(rq,
352                                         i915_ggtt_offset(stream->noa_wait), 0,
353                                         I915_DISPATCH_SECURE);
354         if (err) {
355                 i915_request_add(rq);
356                 goto out_rq;
357         }
358
359         /* Read the GPR back, using the pinned global HWSP for convenience */
360         store = memset32(rq->engine->status_page.addr + 512, 0, 32);
361         for (i = 0; i < 32; i++) {
362                 u32 cmd;
363
364                 cs = intel_ring_begin(rq, 4);
365                 if (IS_ERR(cs)) {
366                         err = PTR_ERR(cs);
367                         i915_request_add(rq);
368                         goto out_rq;
369                 }
370
371                 cmd = MI_STORE_REGISTER_MEM;
372                 if (GRAPHICS_VER(i915) >= 8)
373                         cmd++;
374                 cmd |= MI_USE_GGTT;
375
376                 *cs++ = cmd;
377                 *cs++ = gpr0 + i * sizeof(u32);
378                 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
379                         offset_in_page(store) +
380                         i * sizeof(u32);
381                 *cs++ = 0;
382                 intel_ring_advance(rq, cs);
383         }
384
385         i915_request_add(rq);
386
387         if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
388                 pr_err("noa_wait timed out\n");
389                 intel_gt_set_wedged(stream->engine->gt);
390                 err = -EIO;
391                 goto out_rq;
392         }
393
394         /* Verify that the GPR contain our expected values */
395         for (i = 0; i < 32; i++) {
396                 if (store[i] == STACK_MAGIC)
397                         continue;
398
399                 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
400                        i, store[i], STACK_MAGIC);
401                 err = -EINVAL;
402         }
403
404         /* Verify that the user's scratch page was not used for GPR storage */
405         if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
406                 pr_err("Scratch page overwritten!\n");
407                 igt_hexdump(scratch, 4096);
408                 err = -EINVAL;
409         }
410
411 out_rq:
412         i915_request_put(rq);
413 out_ce:
414         intel_context_put(ce);
415 out:
416         stream_destroy(stream);
417         return err;
418 }
419
420 int i915_perf_live_selftests(struct drm_i915_private *i915)
421 {
422         static const struct i915_subtest tests[] = {
423                 SUBTEST(live_sanitycheck),
424                 SUBTEST(live_noa_delay),
425                 SUBTEST(live_noa_gpr),
426         };
427         struct i915_perf *perf = &i915->perf;
428         int err;
429
430         if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
431                 return 0;
432
433         if (intel_gt_is_wedged(to_gt(i915)))
434                 return 0;
435
436         err = alloc_empty_config(&i915->perf);
437         if (err)
438                 return err;
439
440         err = i915_live_subtests(tests, i915);
441
442         destroy_empty_config(&i915->perf);
443
444         return err;
445 }
This page took 0.059722 seconds and 4 git commands to generate.