drivers/gpu/drm/i915/selftests/i915_perf.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include <linux/kref.h>
   8
   9 #include "gem/i915_gem_pm.h"
  10 #include "gt/intel_gt.h"
  11
  12 #include "i915_selftest.h"
  13
  14 #include "igt_flush_test.h"
  15 #include "lib_sw_fence.h"
  16
  17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
  18
  19 static int
  20 alloc_empty_config(struct i915_perf *perf)
  21 {
  22         struct i915_oa_config *oa_config;
  23
  24         oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
  25         if (!oa_config)
  26                 return -ENOMEM;
  27
  28         oa_config->perf = perf;
  29         kref_init(&oa_config->ref);
  30
  31         strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
  32
  33         mutex_lock(&perf->metrics_lock);
  34
  35         oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
  36         if (oa_config->id < 0)  {
  37                 mutex_unlock(&perf->metrics_lock);
  38                 i915_oa_config_put(oa_config);
  39                 return -ENOMEM;
  40         }
  41
  42         mutex_unlock(&perf->metrics_lock);
  43
  44         return 0;
  45 }
  46
  47 static void
  48 destroy_empty_config(struct i915_perf *perf)
  49 {
  50         struct i915_oa_config *oa_config = NULL, *tmp;
  51         int id;
  52
  53         mutex_lock(&perf->metrics_lock);
  54
  55         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  56                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  57                         oa_config = tmp;
  58                         break;
  59                 }
  60         }
  61
  62         if (oa_config)
  63                 idr_remove(&perf->metrics_idr, oa_config->id);
  64
  65         mutex_unlock(&perf->metrics_lock);
  66
  67         if (oa_config)
  68                 i915_oa_config_put(oa_config);
  69 }
  70
  71 static struct i915_oa_config *
  72 get_empty_config(struct i915_perf *perf)
  73 {
  74         struct i915_oa_config *oa_config = NULL, *tmp;
  75         int id;
  76
  77         mutex_lock(&perf->metrics_lock);
  78
  79         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  80                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  81                         oa_config = i915_oa_config_get(tmp);
  82                         break;
  83                 }
  84         }
  85
  86         mutex_unlock(&perf->metrics_lock);
  87
  88         return oa_config;
  89 }
  90
  91 static struct i915_perf_stream *
  92 test_stream(struct i915_perf *perf)
  93 {
  94         struct drm_i915_perf_open_param param = {};
  95         struct i915_oa_config *oa_config = get_empty_config(perf);
  96         struct perf_open_properties props = {
  97                 .engine = intel_engine_lookup_user(perf->i915,
  98                                                    I915_ENGINE_CLASS_RENDER,
  99                                                    0),
 100                 .sample_flags = SAMPLE_OA_REPORT,
 101                 .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
 102                 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
 103         };
 104         struct i915_perf_stream *stream;
 105         struct intel_gt *gt;
 106
 107         if (!props.engine)
 108                 return NULL;
 109
 110         gt = props.engine->gt;
 111
 112         if (!oa_config)
 113                 return NULL;
 114
 115         props.metrics_set = oa_config->id;
 116
 117         stream = kzalloc(sizeof(*stream), GFP_KERNEL);
 118         if (!stream) {
 119                 i915_oa_config_put(oa_config);
 120                 return NULL;
 121         }
 122
 123         stream->perf = perf;
 124
 125         mutex_lock(&gt->perf.lock);
 126         if (i915_oa_stream_init(stream, &param, &props)) {
 127                 kfree(stream);
 128                 stream =  NULL;
 129         }
 130         mutex_unlock(&gt->perf.lock);
 131
 132         i915_oa_config_put(oa_config);
 133
 134         return stream;
 135 }
 136
 137 static void stream_destroy(struct i915_perf_stream *stream)
 138 {
 139         struct intel_gt *gt = stream->engine->gt;
 140
 141         mutex_lock(&gt->perf.lock);
 142         i915_perf_destroy_locked(stream);
 143         mutex_unlock(&gt->perf.lock);
 144 }
 145
 146 static int live_sanitycheck(void *arg)
 147 {
 148         struct drm_i915_private *i915 = arg;
 149         struct i915_perf_stream *stream;
 150
 151         /* Quick check we can create a perf stream */
 152
 153         stream = test_stream(&i915->perf);
 154         if (!stream)
 155                 return -EINVAL;
 156
 157         stream_destroy(stream);
 158         return 0;
 159 }
 160
 161 static int write_timestamp(struct i915_request *rq, int slot)
 162 {
 163         u32 *cs;
 164         int len;
 165
 166         cs = intel_ring_begin(rq, 6);
 167         if (IS_ERR(cs))
 168                 return PTR_ERR(cs);
 169
 170         len = 5;
 171         if (GRAPHICS_VER(rq->engine->i915) >= 8)
 172                 len++;
 173
 174         *cs++ = GFX_OP_PIPE_CONTROL(len);
 175         *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
 176                 PIPE_CONTROL_STORE_DATA_INDEX |
 177                 PIPE_CONTROL_WRITE_TIMESTAMP;
 178         *cs++ = slot * sizeof(u32);
 179         *cs++ = 0;
 180         *cs++ = 0;
 181         *cs++ = 0;
 182
 183         intel_ring_advance(rq, cs);
 184
 185         return 0;
 186 }
 187
 188 static ktime_t poll_status(struct i915_request *rq, int slot)
 189 {
 190         while (!intel_read_status_page(rq->engine, slot) &&
 191                !i915_request_completed(rq))
 192                 cpu_relax();
 193
 194         return ktime_get();
 195 }
 196
 197 static int live_noa_delay(void *arg)
 198 {
 199         struct drm_i915_private *i915 = arg;
 200         struct i915_perf_stream *stream;
 201         struct i915_request *rq;
 202         ktime_t t0, t1;
 203         u64 expected;
 204         u32 delay;
 205         int err;
 206         int i;
 207
 208         /* Check that the GPU delays matches expectations */
 209
 210         stream = test_stream(&i915->perf);
 211         if (!stream)
 212                 return -ENOMEM;
 213
 214         expected = atomic64_read(&stream->perf->noa_programming_delay);
 215
 216         if (stream->engine->class != RENDER_CLASS) {
 217                 err = -ENODEV;
 218                 goto out;
 219         }
 220
 221         for (i = 0; i < 4; i++)
 222                 intel_write_status_page(stream->engine, 0x100 + i, 0);
 223
 224         rq = intel_engine_create_kernel_request(stream->engine);
 225         if (IS_ERR(rq)) {
 226                 err = PTR_ERR(rq);
 227                 goto out;
 228         }
 229
 230         if (rq->engine->emit_init_breadcrumb) {
 231                 err = rq->engine->emit_init_breadcrumb(rq);
 232                 if (err) {
 233                         i915_request_add(rq);
 234                         goto out;
 235                 }
 236         }
 237
 238         err = write_timestamp(rq, 0x100);
 239         if (err) {
 240                 i915_request_add(rq);
 241                 goto out;
 242         }
 243
 244         err = rq->engine->emit_bb_start(rq,
 245                                         i915_ggtt_offset(stream->noa_wait), 0,
 246                                         I915_DISPATCH_SECURE);
 247         if (err) {
 248                 i915_request_add(rq);
 249                 goto out;
 250         }
 251
 252         err = write_timestamp(rq, 0x102);
 253         if (err) {
 254                 i915_request_add(rq);
 255                 goto out;
 256         }
 257
 258         i915_request_get(rq);
 259         i915_request_add(rq);
 260
 261         preempt_disable();
 262         t0 = poll_status(rq, 0x100);
 263         t1 = poll_status(rq, 0x102);
 264         preempt_enable();
 265
 266         pr_info("CPU delay: %lluns, expected %lluns\n",
 267                 ktime_sub(t1, t0), expected);
 268
 269         delay = intel_read_status_page(stream->engine, 0x102);
 270         delay -= intel_read_status_page(stream->engine, 0x100);
 271         delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
 272         pr_info("GPU delay: %uns, expected %lluns\n",
 273                 delay, expected);
 274
 275         if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
 276                 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
 277                        delay / 1000,
 278                        div_u64(3 * expected, 4000),
 279                        div_u64(3 * expected, 2000));
 280                 err = -EINVAL;
 281         }
 282
 283         i915_request_put(rq);
 284 out:
 285         stream_destroy(stream);
 286         return err;
 287 }
 288
 289 static int live_noa_gpr(void *arg)
 290 {
 291         struct drm_i915_private *i915 = arg;
 292         struct i915_perf_stream *stream;
 293         struct intel_context *ce;
 294         struct i915_request *rq;
 295         u32 *cs, *store;
 296         void *scratch;
 297         u32 gpr0;
 298         int err;
 299         int i;
 300
 301         /* Check that the delay does not clobber user context state (GPR) */
 302
 303         stream = test_stream(&i915->perf);
 304         if (!stream)
 305                 return -ENOMEM;
 306
 307         gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
 308
 309         ce = intel_context_create(stream->engine);
 310         if (IS_ERR(ce)) {
 311                 err = PTR_ERR(ce);
 312                 goto out;
 313         }
 314
 315         /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
 316         scratch = __px_vaddr(ce->vm->scratch[0]);
 317         memset(scratch, POISON_FREE, PAGE_SIZE);
 318
 319         rq = intel_context_create_request(ce);
 320         if (IS_ERR(rq)) {
 321                 err = PTR_ERR(rq);
 322                 goto out_ce;
 323         }
 324         i915_request_get(rq);
 325
 326         if (rq->engine->emit_init_breadcrumb) {
 327                 err = rq->engine->emit_init_breadcrumb(rq);
 328                 if (err) {
 329                         i915_request_add(rq);
 330                         goto out_rq;
 331                 }
 332         }
 333
 334         /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
 335         cs = intel_ring_begin(rq, 2 * 32 + 2);
 336         if (IS_ERR(cs)) {
 337                 err = PTR_ERR(cs);
 338                 i915_request_add(rq);
 339                 goto out_rq;
 340         }
 341
 342         *cs++ = MI_LOAD_REGISTER_IMM(32);
 343         for (i = 0; i < 32; i++) {
 344                 *cs++ = gpr0 + i * sizeof(u32);
 345                 *cs++ = STACK_MAGIC;
 346         }
 347         *cs++ = MI_NOOP;
 348         intel_ring_advance(rq, cs);
 349
 350         /* Execute the GPU delay */
 351         err = rq->engine->emit_bb_start(rq,
 352                                         i915_ggtt_offset(stream->noa_wait), 0,
 353                                         I915_DISPATCH_SECURE);
 354         if (err) {
 355                 i915_request_add(rq);
 356                 goto out_rq;
 357         }
 358
 359         /* Read the GPR back, using the pinned global HWSP for convenience */
 360         store = memset32(rq->engine->status_page.addr + 512, 0, 32);
 361         for (i = 0; i < 32; i++) {
 362                 u32 cmd;
 363
 364                 cs = intel_ring_begin(rq, 4);
 365                 if (IS_ERR(cs)) {
 366                         err = PTR_ERR(cs);
 367                         i915_request_add(rq);
 368                         goto out_rq;
 369                 }
 370
 371                 cmd = MI_STORE_REGISTER_MEM;
 372                 if (GRAPHICS_VER(i915) >= 8)
 373                         cmd++;
 374                 cmd |= MI_USE_GGTT;
 375
 376                 *cs++ = cmd;
 377                 *cs++ = gpr0 + i * sizeof(u32);
 378                 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
 379                         offset_in_page(store) +
 380                         i * sizeof(u32);
 381                 *cs++ = 0;
 382                 intel_ring_advance(rq, cs);
 383         }
 384
 385         i915_request_add(rq);
 386
 387         if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
 388                 pr_err("noa_wait timed out\n");
 389                 intel_gt_set_wedged(stream->engine->gt);
 390                 err = -EIO;
 391                 goto out_rq;
 392         }
 393
 394         /* Verify that the GPR contain our expected values */
 395         for (i = 0; i < 32; i++) {
 396                 if (store[i] == STACK_MAGIC)
 397                         continue;
 398
 399                 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
 400                        i, store[i], STACK_MAGIC);
 401                 err = -EINVAL;
 402         }
 403
 404         /* Verify that the user's scratch page was not used for GPR storage */
 405         if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
 406                 pr_err("Scratch page overwritten!\n");
 407                 igt_hexdump(scratch, 4096);
 408                 err = -EINVAL;
 409         }
 410
 411 out_rq:
 412         i915_request_put(rq);
 413 out_ce:
 414         intel_context_put(ce);
 415 out:
 416         stream_destroy(stream);
 417         return err;
 418 }
 419
 420 int i915_perf_live_selftests(struct drm_i915_private *i915)
 421 {
 422         static const struct i915_subtest tests[] = {
 423                 SUBTEST(live_sanitycheck),
 424                 SUBTEST(live_noa_delay),
 425                 SUBTEST(live_noa_gpr),
 426         };
 427         struct i915_perf *perf = &i915->perf;
 428         int err;
 429
 430         if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
 431                 return 0;
 432
 433         if (intel_gt_is_wedged(to_gt(i915)))
 434                 return 0;
 435
 436         err = alloc_empty_config(&i915->perf);
 437         if (err)
 438                 return err;
 439
 440         err = i915_live_subtests(tests, i915);
 441
 442         destroy_empty_config(&i915->perf);
 443
 444         return err;
 445 }