]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/selftest_rps.c
Merge tag 'clang-format-6.8' of https://github.com/ojeda/linux
[linux.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "gem/i915_gem_internal.h"
10
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24
25 /* Try to isolate the impact of cstates from determing frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27
28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31
32 static int cmp_u64(const void *A, const void *B)
33 {
34         const u64 *a = A, *b = B;
35
36         if (*a < *b)
37                 return -1;
38         else if (*a > *b)
39                 return 1;
40         else
41                 return 0;
42 }
43
44 static int cmp_u32(const void *A, const void *B)
45 {
46         const u32 *a = A, *b = B;
47
48         if (*a < *b)
49                 return -1;
50         else if (*a > *b)
51                 return 1;
52         else
53                 return 0;
54 }
55
56 static struct i915_vma *
57 create_spin_counter(struct intel_engine_cs *engine,
58                     struct i915_address_space *vm,
59                     bool srm,
60                     u32 **cancel,
61                     u32 **counter)
62 {
63         enum {
64                 COUNT,
65                 INC,
66                 __NGPR__,
67         };
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69         struct drm_i915_gem_object *obj;
70         struct i915_vma *vma;
71         unsigned long end;
72         u32 *base, *cs;
73         int loop, i;
74         int err;
75
76         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77         if (IS_ERR(obj))
78                 return ERR_CAST(obj);
79
80         end = obj->base.size / sizeof(u32) - 1;
81
82         vma = i915_vma_instance(obj, vm, NULL);
83         if (IS_ERR(vma)) {
84                 err = PTR_ERR(vma);
85                 goto err_put;
86         }
87
88         err = i915_vma_pin(vma, 0, 0, PIN_USER);
89         if (err)
90                 goto err_unlock;
91
92         i915_vma_lock(vma);
93
94         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95         if (IS_ERR(base)) {
96                 err = PTR_ERR(base);
97                 goto err_unpin;
98         }
99         cs = base;
100
101         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102         for (i = 0; i < __NGPR__; i++) {
103                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104                 *cs++ = 0;
105                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106                 *cs++ = 0;
107         }
108
109         *cs++ = MI_LOAD_REGISTER_IMM(1);
110         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111         *cs++ = 1;
112
113         loop = cs - base;
114
115         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116         for (i = 0; i < 1024; i++) {
117                 *cs++ = MI_MATH(4);
118                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120                 *cs++ = MI_MATH_ADD;
121                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122
123                 if (srm) {
124                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
125                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126                         *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127                         *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128                 }
129         }
130
131         *cs++ = MI_BATCH_BUFFER_START_GEN8;
132         *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133         *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134         GEM_BUG_ON(cs - base > end);
135
136         i915_gem_object_flush_map(obj);
137
138         *cancel = base + loop;
139         *counter = srm ? memset32(base + end, 0, 1) : NULL;
140         return vma;
141
142 err_unpin:
143         i915_vma_unpin(vma);
144 err_unlock:
145         i915_vma_unlock(vma);
146 err_put:
147         i915_gem_object_put(obj);
148         return ERR_PTR(err);
149 }
150
151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153         u8 history[64], i;
154         unsigned long end;
155         int sleep;
156
157         i = 0;
158         memset(history, freq, sizeof(history));
159         sleep = 20;
160
161         /* The PCU does not change instantly, but drifts towards the goal? */
162         end = jiffies + msecs_to_jiffies(timeout_ms);
163         do {
164                 u8 act;
165
166                 act = read_cagf(rps);
167                 if (time_after(jiffies, end))
168                         return act;
169
170                 /* Target acquired */
171                 if (act == freq)
172                         return act;
173
174                 /* Any change within the last N samples? */
175                 if (!memchr_inv(history, act, sizeof(history)))
176                         return act;
177
178                 history[i] = act;
179                 i = (i + 1) % ARRAY_SIZE(history);
180
181                 usleep_range(sleep, 2 * sleep);
182                 sleep *= 2;
183                 if (sleep > timeout_ms * 20)
184                         sleep = timeout_ms * 20;
185         } while (1);
186 }
187
188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190         mutex_lock(&rps->lock);
191         GEM_BUG_ON(!intel_rps_is_active(rps));
192         if (wait_for(!intel_rps_set(rps, freq), 50)) {
193                 mutex_unlock(&rps->lock);
194                 return 0;
195         }
196         GEM_BUG_ON(rps->last_freq != freq);
197         mutex_unlock(&rps->lock);
198
199         return wait_for_freq(rps, freq, 50);
200 }
201
202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204         struct drm_i915_private *i915 = rps_to_i915(rps);
205
206         if (IS_BROXTON(i915)) {
207                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209                         intel_uncore_read(rps_to_uncore(rps),
210                                           BXT_RP_STATE_CAP));
211         } else if (GRAPHICS_VER(i915) == 9) {
212                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214                         intel_uncore_read(rps_to_uncore(rps),
215                                           GEN9_RP_STATE_LIMITS));
216         }
217 }
218
219 int live_rps_clock_interval(void *arg)
220 {
221         struct intel_gt *gt = arg;
222         struct intel_rps *rps = &gt->rps;
223         void (*saved_work)(struct work_struct *wrk);
224         struct intel_engine_cs *engine;
225         enum intel_engine_id id;
226         struct igt_spinner spin;
227         int err = 0;
228
229         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
230                 return 0;
231
232         if (igt_spinner_init(&spin, gt))
233                 return -ENOMEM;
234
235         intel_gt_pm_wait_for_idle(gt);
236         saved_work = rps->work.func;
237         rps->work.func = dummy_rps_work;
238
239         intel_gt_pm_get(gt);
240         intel_rps_disable(&gt->rps);
241
242         intel_gt_check_clock_frequency(gt);
243
244         for_each_engine(engine, gt, id) {
245                 struct i915_request *rq;
246                 u32 cycles;
247                 u64 dt;
248
249                 if (!intel_engine_can_store_dword(engine))
250                         continue;
251
252                 st_engine_heartbeat_disable(engine);
253
254                 rq = igt_spinner_create_request(&spin,
255                                                 engine->kernel_context,
256                                                 MI_NOOP);
257                 if (IS_ERR(rq)) {
258                         st_engine_heartbeat_enable(engine);
259                         err = PTR_ERR(rq);
260                         break;
261                 }
262
263                 i915_request_add(rq);
264
265                 if (!igt_wait_for_spinner(&spin, rq)) {
266                         pr_err("%s: RPS spinner did not start\n",
267                                engine->name);
268                         igt_spinner_end(&spin);
269                         st_engine_heartbeat_enable(engine);
270                         intel_gt_set_wedged(engine->gt);
271                         err = -EIO;
272                         break;
273                 }
274
275                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276
277                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278
279                 /* Set the evaluation interval to infinity! */
280                 intel_uncore_write_fw(gt->uncore,
281                                       GEN6_RP_UP_EI, 0xffffffff);
282                 intel_uncore_write_fw(gt->uncore,
283                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
284
285                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287
288                 if (wait_for(intel_uncore_read_fw(gt->uncore,
289                                                   GEN6_RP_CUR_UP_EI),
290                              10)) {
291                         /* Just skip the test; assume lack of HW support */
292                         pr_notice("%s: rps evaluation interval not ticking\n",
293                                   engine->name);
294                         err = -ENODEV;
295                 } else {
296                         ktime_t dt_[5];
297                         u32 cycles_[5];
298                         int i;
299
300                         for (i = 0; i < 5; i++) {
301                                 preempt_disable();
302
303                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
304                                 dt_[i] = ktime_get();
305
306                                 udelay(1000);
307
308                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
309                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
310
311                                 preempt_enable();
312                         }
313
314                         /* Use the median of both cycle/dt; close enough */
315                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
319                 }
320
321                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323
324                 igt_spinner_end(&spin);
325                 st_engine_heartbeat_enable(engine);
326
327                 if (err == 0) {
328                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329                         u32 expected =
330                                 intel_gt_ns_to_pm_interval(gt, dt);
331
332                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333                                 engine->name, cycles, time, dt, expected,
334                                 gt->clock_frequency / 1000);
335
336                         if (10 * time < 8 * dt ||
337                             8 * time > 10 * dt) {
338                                 pr_err("%s: rps clock time does not match walltime!\n",
339                                        engine->name);
340                                 err = -EINVAL;
341                         }
342
343                         if (10 * expected < 8 * cycles ||
344                             8 * expected > 10 * cycles) {
345                                 pr_err("%s: walltime does not match rps clock ticks!\n",
346                                        engine->name);
347                                 err = -EINVAL;
348                         }
349                 }
350
351                 if (igt_flush_test(gt->i915))
352                         err = -EIO;
353
354                 break; /* once is enough */
355         }
356
357         intel_rps_enable(&gt->rps);
358         intel_gt_pm_put(gt);
359
360         igt_spinner_fini(&spin);
361
362         intel_gt_pm_wait_for_idle(gt);
363         rps->work.func = saved_work;
364
365         if (err == -ENODEV) /* skipped, don't report a fail */
366                 err = 0;
367
368         return err;
369 }
370
371 int live_rps_control(void *arg)
372 {
373         struct intel_gt *gt = arg;
374         struct intel_rps *rps = &gt->rps;
375         void (*saved_work)(struct work_struct *wrk);
376         struct intel_engine_cs *engine;
377         enum intel_engine_id id;
378         struct igt_spinner spin;
379         int err = 0;
380
381         /*
382          * Check that the actual frequency matches our requested frequency,
383          * to verify our control mechanism. We have to be careful that the
384          * PCU may throttle the GPU in which case the actual frequency used
385          * will be lowered than requested.
386          */
387
388         if (!intel_rps_is_enabled(rps))
389                 return 0;
390
391         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
392                 return 0;
393
394         if (igt_spinner_init(&spin, gt))
395                 return -ENOMEM;
396
397         intel_gt_pm_wait_for_idle(gt);
398         saved_work = rps->work.func;
399         rps->work.func = dummy_rps_work;
400
401         intel_gt_pm_get(gt);
402         for_each_engine(engine, gt, id) {
403                 struct i915_request *rq;
404                 ktime_t min_dt, max_dt;
405                 int f, limit;
406                 int min, max;
407
408                 if (!intel_engine_can_store_dword(engine))
409                         continue;
410
411                 st_engine_heartbeat_disable(engine);
412
413                 rq = igt_spinner_create_request(&spin,
414                                                 engine->kernel_context,
415                                                 MI_NOOP);
416                 if (IS_ERR(rq)) {
417                         err = PTR_ERR(rq);
418                         break;
419                 }
420
421                 i915_request_add(rq);
422
423                 if (!igt_wait_for_spinner(&spin, rq)) {
424                         pr_err("%s: RPS spinner did not start\n",
425                                engine->name);
426                         igt_spinner_end(&spin);
427                         st_engine_heartbeat_enable(engine);
428                         intel_gt_set_wedged(engine->gt);
429                         err = -EIO;
430                         break;
431                 }
432
433                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435                                engine->name, rps->min_freq, read_cagf(rps));
436                         igt_spinner_end(&spin);
437                         st_engine_heartbeat_enable(engine);
438                         show_pstate_limits(rps);
439                         err = -EINVAL;
440                         break;
441                 }
442
443                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444                         if (rps_set_check(rps, f) < f)
445                                 break;
446                 }
447
448                 limit = rps_set_check(rps, f);
449
450                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452                                engine->name, rps->min_freq, read_cagf(rps));
453                         igt_spinner_end(&spin);
454                         st_engine_heartbeat_enable(engine);
455                         show_pstate_limits(rps);
456                         err = -EINVAL;
457                         break;
458                 }
459
460                 max_dt = ktime_get();
461                 max = rps_set_check(rps, limit);
462                 max_dt = ktime_sub(ktime_get(), max_dt);
463
464                 min_dt = ktime_get();
465                 min = rps_set_check(rps, rps->min_freq);
466                 min_dt = ktime_sub(ktime_get(), min_dt);
467
468                 igt_spinner_end(&spin);
469                 st_engine_heartbeat_enable(engine);
470
471                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472                         engine->name,
473                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475                         limit, intel_gpu_freq(rps, limit),
476                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477
478                 if (limit == rps->min_freq) {
479                         pr_err("%s: GPU throttled to minimum!\n",
480                                engine->name);
481                         show_pstate_limits(rps);
482                         err = -ENODEV;
483                         break;
484                 }
485
486                 if (igt_flush_test(gt->i915)) {
487                         err = -EIO;
488                         break;
489                 }
490         }
491         intel_gt_pm_put(gt);
492
493         igt_spinner_fini(&spin);
494
495         intel_gt_pm_wait_for_idle(gt);
496         rps->work.func = saved_work;
497
498         return err;
499 }
500
501 static void show_pcu_config(struct intel_rps *rps)
502 {
503         struct drm_i915_private *i915 = rps_to_i915(rps);
504         unsigned int max_gpu_freq, min_gpu_freq;
505         intel_wakeref_t wakeref;
506         int gpu_freq;
507
508         if (!HAS_LLC(i915))
509                 return;
510
511         min_gpu_freq = rps->min_freq;
512         max_gpu_freq = rps->max_freq;
513         if (GRAPHICS_VER(i915) >= 9) {
514                 /* Convert GT frequency to 50 HZ units */
515                 min_gpu_freq /= GEN9_FREQ_SCALER;
516                 max_gpu_freq /= GEN9_FREQ_SCALER;
517         }
518
519         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520
521         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
522         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523                 int ia_freq = gpu_freq;
524
525                 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
526                                &ia_freq, NULL);
527
528                 pr_info("%5d  %5d  %5d\n",
529                         gpu_freq * 50,
530                         ((ia_freq >> 0) & 0xff) * 100,
531                         ((ia_freq >> 8) & 0xff) * 100);
532         }
533
534         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
535 }
536
537 static u64 __measure_frequency(u32 *cntr, int duration_ms)
538 {
539         u64 dc, dt;
540
541         dc = READ_ONCE(*cntr);
542         dt = ktime_get();
543         usleep_range(1000 * duration_ms, 2000 * duration_ms);
544         dc = READ_ONCE(*cntr) - dc;
545         dt = ktime_get() - dt;
546
547         return div64_u64(1000 * 1000 * dc, dt);
548 }
549
550 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
551 {
552         u64 x[5];
553         int i;
554
555         *freq = rps_set_check(rps, *freq);
556         for (i = 0; i < 5; i++)
557                 x[i] = __measure_frequency(cntr, 2);
558         *freq = (*freq + read_cagf(rps)) / 2;
559
560         /* A simple triangle filter for better result stability */
561         sort(x, 5, sizeof(*x), cmp_u64, NULL);
562         return div_u64(x[1] + 2 * x[2] + x[3], 4);
563 }
564
565 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
566                                   int duration_ms)
567 {
568         u64 dc, dt;
569
570         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571         dt = ktime_get();
572         usleep_range(1000 * duration_ms, 2000 * duration_ms);
573         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
574         dt = ktime_get() - dt;
575
576         return div64_u64(1000 * 1000 * dc, dt);
577 }
578
579 static u64 measure_cs_frequency_at(struct intel_rps *rps,
580                                    struct intel_engine_cs *engine,
581                                    int *freq)
582 {
583         u64 x[5];
584         int i;
585
586         *freq = rps_set_check(rps, *freq);
587         for (i = 0; i < 5; i++)
588                 x[i] = __measure_cs_frequency(engine, 2);
589         *freq = (*freq + read_cagf(rps)) / 2;
590
591         /* A simple triangle filter for better result stability */
592         sort(x, 5, sizeof(*x), cmp_u64, NULL);
593         return div_u64(x[1] + 2 * x[2] + x[3], 4);
594 }
595
596 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
597 {
598         return f_d * x > f_n * y && f_n * x < f_d * y;
599 }
600
601 int live_rps_frequency_cs(void *arg)
602 {
603         void (*saved_work)(struct work_struct *wrk);
604         struct intel_gt *gt = arg;
605         struct intel_rps *rps = &gt->rps;
606         struct intel_engine_cs *engine;
607         struct pm_qos_request qos;
608         enum intel_engine_id id;
609         int err = 0;
610
611         /*
612          * The premise is that the GPU does change frequency at our behest.
613          * Let's check there is a correspondence between the requested
614          * frequency, the actual frequency, and the observed clock rate.
615          */
616
617         if (!intel_rps_is_enabled(rps))
618                 return 0;
619
620         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
621                 return 0;
622
623         if (CPU_LATENCY >= 0)
624                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
625
626         intel_gt_pm_wait_for_idle(gt);
627         saved_work = rps->work.func;
628         rps->work.func = dummy_rps_work;
629
630         for_each_engine(engine, gt, id) {
631                 struct i915_request *rq;
632                 struct i915_vma *vma;
633                 u32 *cancel, *cntr;
634                 struct {
635                         u64 count;
636                         int freq;
637                 } min, max;
638
639                 st_engine_heartbeat_disable(engine);
640
641                 vma = create_spin_counter(engine,
642                                           engine->kernel_context->vm, false,
643                                           &cancel, &cntr);
644                 if (IS_ERR(vma)) {
645                         err = PTR_ERR(vma);
646                         st_engine_heartbeat_enable(engine);
647                         break;
648                 }
649
650                 rq = intel_engine_create_kernel_request(engine);
651                 if (IS_ERR(rq)) {
652                         err = PTR_ERR(rq);
653                         goto err_vma;
654                 }
655
656                 err = i915_vma_move_to_active(vma, rq, 0);
657                 if (!err)
658                         err = rq->engine->emit_bb_start(rq,
659                                                         i915_vma_offset(vma),
660                                                         PAGE_SIZE, 0);
661                 i915_request_add(rq);
662                 if (err)
663                         goto err_vma;
664
665                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
666                              10)) {
667                         pr_err("%s: timed loop did not start\n",
668                                engine->name);
669                         goto err_vma;
670                 }
671
672                 min.freq = rps->min_freq;
673                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
674
675                 max.freq = rps->max_freq;
676                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
677
678                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
679                         engine->name,
680                         min.count, intel_gpu_freq(rps, min.freq),
681                         max.count, intel_gpu_freq(rps, max.freq),
682                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
683                                                      max.freq * min.count));
684
685                 if (!scaled_within(max.freq * min.count,
686                                    min.freq * max.count,
687                                    2, 3)) {
688                         int f;
689
690                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
691                                engine->name,
692                                max.freq * min.count,
693                                min.freq * max.count);
694                         show_pcu_config(rps);
695
696                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
697                                 int act = f;
698                                 u64 count;
699
700                                 count = measure_cs_frequency_at(rps, engine, &act);
701                                 if (act < f)
702                                         break;
703
704                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
705                                         engine->name,
706                                         act, intel_gpu_freq(rps, act), count,
707                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
708                                                                      act * min.count));
709
710                                 f = act; /* may skip ahead [pcu granularity] */
711                         }
712
713                         err = -EINTR; /* ignore error, continue on with test */
714                 }
715
716 err_vma:
717                 *cancel = MI_BATCH_BUFFER_END;
718                 i915_gem_object_flush_map(vma->obj);
719                 i915_gem_object_unpin_map(vma->obj);
720                 i915_vma_unpin(vma);
721                 i915_vma_unlock(vma);
722                 i915_vma_put(vma);
723
724                 st_engine_heartbeat_enable(engine);
725                 if (igt_flush_test(gt->i915))
726                         err = -EIO;
727                 if (err)
728                         break;
729         }
730
731         intel_gt_pm_wait_for_idle(gt);
732         rps->work.func = saved_work;
733
734         if (CPU_LATENCY >= 0)
735                 cpu_latency_qos_remove_request(&qos);
736
737         return err;
738 }
739
740 int live_rps_frequency_srm(void *arg)
741 {
742         void (*saved_work)(struct work_struct *wrk);
743         struct intel_gt *gt = arg;
744         struct intel_rps *rps = &gt->rps;
745         struct intel_engine_cs *engine;
746         struct pm_qos_request qos;
747         enum intel_engine_id id;
748         int err = 0;
749
750         /*
751          * The premise is that the GPU does change frequency at our behest.
752          * Let's check there is a correspondence between the requested
753          * frequency, the actual frequency, and the observed clock rate.
754          */
755
756         if (!intel_rps_is_enabled(rps))
757                 return 0;
758
759         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
760                 return 0;
761
762         if (CPU_LATENCY >= 0)
763                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
764
765         intel_gt_pm_wait_for_idle(gt);
766         saved_work = rps->work.func;
767         rps->work.func = dummy_rps_work;
768
769         for_each_engine(engine, gt, id) {
770                 struct i915_request *rq;
771                 struct i915_vma *vma;
772                 u32 *cancel, *cntr;
773                 struct {
774                         u64 count;
775                         int freq;
776                 } min, max;
777
778                 st_engine_heartbeat_disable(engine);
779
780                 vma = create_spin_counter(engine,
781                                           engine->kernel_context->vm, true,
782                                           &cancel, &cntr);
783                 if (IS_ERR(vma)) {
784                         err = PTR_ERR(vma);
785                         st_engine_heartbeat_enable(engine);
786                         break;
787                 }
788
789                 rq = intel_engine_create_kernel_request(engine);
790                 if (IS_ERR(rq)) {
791                         err = PTR_ERR(rq);
792                         goto err_vma;
793                 }
794
795                 err = i915_vma_move_to_active(vma, rq, 0);
796                 if (!err)
797                         err = rq->engine->emit_bb_start(rq,
798                                                         i915_vma_offset(vma),
799                                                         PAGE_SIZE, 0);
800                 i915_request_add(rq);
801                 if (err)
802                         goto err_vma;
803
804                 if (wait_for(READ_ONCE(*cntr), 10)) {
805                         pr_err("%s: timed loop did not start\n",
806                                engine->name);
807                         goto err_vma;
808                 }
809
810                 min.freq = rps->min_freq;
811                 min.count = measure_frequency_at(rps, cntr, &min.freq);
812
813                 max.freq = rps->max_freq;
814                 max.count = measure_frequency_at(rps, cntr, &max.freq);
815
816                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
817                         engine->name,
818                         min.count, intel_gpu_freq(rps, min.freq),
819                         max.count, intel_gpu_freq(rps, max.freq),
820                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
821                                                      max.freq * min.count));
822
823                 if (!scaled_within(max.freq * min.count,
824                                    min.freq * max.count,
825                                    1, 2)) {
826                         int f;
827
828                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
829                                engine->name,
830                                max.freq * min.count,
831                                min.freq * max.count);
832                         show_pcu_config(rps);
833
834                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
835                                 int act = f;
836                                 u64 count;
837
838                                 count = measure_frequency_at(rps, cntr, &act);
839                                 if (act < f)
840                                         break;
841
842                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
843                                         engine->name,
844                                         act, intel_gpu_freq(rps, act), count,
845                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
846                                                                      act * min.count));
847
848                                 f = act; /* may skip ahead [pcu granularity] */
849                         }
850
851                         err = -EINTR; /* ignore error, continue on with test */
852                 }
853
854 err_vma:
855                 *cancel = MI_BATCH_BUFFER_END;
856                 i915_gem_object_flush_map(vma->obj);
857                 i915_gem_object_unpin_map(vma->obj);
858                 i915_vma_unpin(vma);
859                 i915_vma_unlock(vma);
860                 i915_vma_put(vma);
861
862                 st_engine_heartbeat_enable(engine);
863                 if (igt_flush_test(gt->i915))
864                         err = -EIO;
865                 if (err)
866                         break;
867         }
868
869         intel_gt_pm_wait_for_idle(gt);
870         rps->work.func = saved_work;
871
872         if (CPU_LATENCY >= 0)
873                 cpu_latency_qos_remove_request(&qos);
874
875         return err;
876 }
877
878 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
879 {
880         /* Flush any previous EI */
881         usleep_range(timeout_us, 2 * timeout_us);
882
883         /* Reset the interrupt status */
884         rps_disable_interrupts(rps);
885         GEM_BUG_ON(rps->pm_iir);
886         rps_enable_interrupts(rps);
887
888         /* And then wait for the timeout, for real this time */
889         usleep_range(2 * timeout_us, 3 * timeout_us);
890 }
891
892 static int __rps_up_interrupt(struct intel_rps *rps,
893                               struct intel_engine_cs *engine,
894                               struct igt_spinner *spin)
895 {
896         struct intel_uncore *uncore = engine->uncore;
897         struct i915_request *rq;
898         u32 timeout;
899
900         if (!intel_engine_can_store_dword(engine))
901                 return 0;
902
903         rps_set_check(rps, rps->min_freq);
904
905         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
906         if (IS_ERR(rq))
907                 return PTR_ERR(rq);
908
909         i915_request_get(rq);
910         i915_request_add(rq);
911
912         if (!igt_wait_for_spinner(spin, rq)) {
913                 pr_err("%s: RPS spinner did not start\n",
914                        engine->name);
915                 i915_request_put(rq);
916                 intel_gt_set_wedged(engine->gt);
917                 return -EIO;
918         }
919
920         if (!intel_rps_is_active(rps)) {
921                 pr_err("%s: RPS not enabled on starting spinner\n",
922                        engine->name);
923                 igt_spinner_end(spin);
924                 i915_request_put(rq);
925                 return -EINVAL;
926         }
927
928         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
929                 pr_err("%s: RPS did not register UP interrupt\n",
930                        engine->name);
931                 i915_request_put(rq);
932                 return -EINVAL;
933         }
934
935         if (rps->last_freq != rps->min_freq) {
936                 pr_err("%s: RPS did not program min frequency\n",
937                        engine->name);
938                 i915_request_put(rq);
939                 return -EINVAL;
940         }
941
942         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
943         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
944         timeout = DIV_ROUND_UP(timeout, 1000);
945
946         sleep_for_ei(rps, timeout);
947         GEM_BUG_ON(i915_request_completed(rq));
948
949         igt_spinner_end(spin);
950         i915_request_put(rq);
951
952         if (rps->cur_freq != rps->min_freq) {
953                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
954                        engine->name, intel_rps_read_actual_frequency(rps));
955                 return -EINVAL;
956         }
957
958         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
959                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
960                        engine->name, rps->pm_iir,
961                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
962                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
963                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
964                 return -EINVAL;
965         }
966
967         return 0;
968 }
969
970 static int __rps_down_interrupt(struct intel_rps *rps,
971                                 struct intel_engine_cs *engine)
972 {
973         struct intel_uncore *uncore = engine->uncore;
974         u32 timeout;
975
976         rps_set_check(rps, rps->max_freq);
977
978         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
979                 pr_err("%s: RPS did not register DOWN interrupt\n",
980                        engine->name);
981                 return -EINVAL;
982         }
983
984         if (rps->last_freq != rps->max_freq) {
985                 pr_err("%s: RPS did not program max frequency\n",
986                        engine->name);
987                 return -EINVAL;
988         }
989
990         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
991         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
992         timeout = DIV_ROUND_UP(timeout, 1000);
993
994         sleep_for_ei(rps, timeout);
995
996         if (rps->cur_freq != rps->max_freq) {
997                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
998                        engine->name,
999                        intel_rps_read_actual_frequency(rps));
1000                 return -EINVAL;
1001         }
1002
1003         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1004                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1005                        engine->name, rps->pm_iir,
1006                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1007                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1008                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1009                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1010                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1011                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1012                 return -EINVAL;
1013         }
1014
1015         return 0;
1016 }
1017
1018 int live_rps_interrupt(void *arg)
1019 {
1020         struct intel_gt *gt = arg;
1021         struct intel_rps *rps = &gt->rps;
1022         void (*saved_work)(struct work_struct *wrk);
1023         struct intel_engine_cs *engine;
1024         enum intel_engine_id id;
1025         struct igt_spinner spin;
1026         u32 pm_events;
1027         int err = 0;
1028
1029         /*
1030          * First, let's check whether or not we are receiving interrupts.
1031          */
1032
1033         if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1034                 return 0;
1035
1036         intel_gt_pm_get(gt);
1037         pm_events = rps->pm_events;
1038         intel_gt_pm_put(gt);
1039         if (!pm_events) {
1040                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1041                 return -ENODEV;
1042         }
1043
1044         if (igt_spinner_init(&spin, gt))
1045                 return -ENOMEM;
1046
1047         intel_gt_pm_wait_for_idle(gt);
1048         saved_work = rps->work.func;
1049         rps->work.func = dummy_rps_work;
1050
1051         for_each_engine(engine, gt, id) {
1052                 /* Keep the engine busy with a spinner; expect an UP! */
1053                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1054                         intel_gt_pm_wait_for_idle(engine->gt);
1055                         GEM_BUG_ON(intel_rps_is_active(rps));
1056
1057                         st_engine_heartbeat_disable(engine);
1058
1059                         err = __rps_up_interrupt(rps, engine, &spin);
1060
1061                         st_engine_heartbeat_enable(engine);
1062                         if (err)
1063                                 goto out;
1064
1065                         intel_gt_pm_wait_for_idle(engine->gt);
1066                 }
1067
1068                 /* Keep the engine awake but idle and check for DOWN */
1069                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1070                         st_engine_heartbeat_disable(engine);
1071                         intel_rc6_disable(&gt->rc6);
1072
1073                         err = __rps_down_interrupt(rps, engine);
1074
1075                         intel_rc6_enable(&gt->rc6);
1076                         st_engine_heartbeat_enable(engine);
1077                         if (err)
1078                                 goto out;
1079                 }
1080         }
1081
1082 out:
1083         if (igt_flush_test(gt->i915))
1084                 err = -EIO;
1085
1086         igt_spinner_fini(&spin);
1087
1088         intel_gt_pm_wait_for_idle(gt);
1089         rps->work.func = saved_work;
1090
1091         return err;
1092 }
1093
1094 static u64 __measure_power(int duration_ms)
1095 {
1096         u64 dE, dt;
1097
1098         dE = librapl_energy_uJ();
1099         dt = ktime_get();
1100         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1101         dE = librapl_energy_uJ() - dE;
1102         dt = ktime_get() - dt;
1103
1104         return div64_u64(1000 * 1000 * dE, dt);
1105 }
1106
1107 static u64 measure_power(struct intel_rps *rps, int *freq)
1108 {
1109         u64 x[5];
1110         int i;
1111
1112         for (i = 0; i < 5; i++)
1113                 x[i] = __measure_power(5);
1114
1115         *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1116
1117         /* A simple triangle filter for better result stability */
1118         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1119         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1120 }
1121
1122 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1123 {
1124         *freq = rps_set_check(rps, *freq);
1125         return measure_power(rps, freq);
1126 }
1127
1128 int live_rps_power(void *arg)
1129 {
1130         struct intel_gt *gt = arg;
1131         struct intel_rps *rps = &gt->rps;
1132         void (*saved_work)(struct work_struct *wrk);
1133         struct intel_engine_cs *engine;
1134         enum intel_engine_id id;
1135         struct igt_spinner spin;
1136         int err = 0;
1137
1138         /*
1139          * Our fundamental assumption is that running at lower frequency
1140          * actually saves power. Let's see if our RAPL measurement support
1141          * that theory.
1142          */
1143
1144         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1145                 return 0;
1146
1147         if (!librapl_supported(gt->i915))
1148                 return 0;
1149
1150         if (igt_spinner_init(&spin, gt))
1151                 return -ENOMEM;
1152
1153         intel_gt_pm_wait_for_idle(gt);
1154         saved_work = rps->work.func;
1155         rps->work.func = dummy_rps_work;
1156
1157         for_each_engine(engine, gt, id) {
1158                 struct i915_request *rq;
1159                 struct {
1160                         u64 power;
1161                         int freq;
1162                 } min, max;
1163
1164                 if (!intel_engine_can_store_dword(engine))
1165                         continue;
1166
1167                 st_engine_heartbeat_disable(engine);
1168
1169                 rq = igt_spinner_create_request(&spin,
1170                                                 engine->kernel_context,
1171                                                 MI_NOOP);
1172                 if (IS_ERR(rq)) {
1173                         st_engine_heartbeat_enable(engine);
1174                         err = PTR_ERR(rq);
1175                         break;
1176                 }
1177
1178                 i915_request_add(rq);
1179
1180                 if (!igt_wait_for_spinner(&spin, rq)) {
1181                         pr_err("%s: RPS spinner did not start\n",
1182                                engine->name);
1183                         igt_spinner_end(&spin);
1184                         st_engine_heartbeat_enable(engine);
1185                         intel_gt_set_wedged(engine->gt);
1186                         err = -EIO;
1187                         break;
1188                 }
1189
1190                 max.freq = rps->max_freq;
1191                 max.power = measure_power_at(rps, &max.freq);
1192
1193                 min.freq = rps->min_freq;
1194                 min.power = measure_power_at(rps, &min.freq);
1195
1196                 igt_spinner_end(&spin);
1197                 st_engine_heartbeat_enable(engine);
1198
1199                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1200                         engine->name,
1201                         min.power, intel_gpu_freq(rps, min.freq),
1202                         max.power, intel_gpu_freq(rps, max.freq));
1203
1204                 if (10 * min.freq >= 9 * max.freq) {
1205                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1206                                   min.freq, intel_gpu_freq(rps, min.freq),
1207                                   max.freq, intel_gpu_freq(rps, max.freq));
1208                         continue;
1209                 }
1210
1211                 if (11 * min.power > 10 * max.power) {
1212                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1213                                engine->name);
1214                         err = -EINVAL;
1215                         break;
1216                 }
1217
1218                 if (igt_flush_test(gt->i915)) {
1219                         err = -EIO;
1220                         break;
1221                 }
1222         }
1223
1224         igt_spinner_fini(&spin);
1225
1226         intel_gt_pm_wait_for_idle(gt);
1227         rps->work.func = saved_work;
1228
1229         return err;
1230 }
1231
1232 int live_rps_dynamic(void *arg)
1233 {
1234         struct intel_gt *gt = arg;
1235         struct intel_rps *rps = &gt->rps;
1236         struct intel_engine_cs *engine;
1237         enum intel_engine_id id;
1238         struct igt_spinner spin;
1239         int err = 0;
1240
1241         /*
1242          * We've looked at the bascs, and have established that we
1243          * can change the clock frequency and that the HW will generate
1244          * interrupts based on load. Now we check how we integrate those
1245          * moving parts into dynamic reclocking based on load.
1246          */
1247
1248         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1249                 return 0;
1250
1251         if (igt_spinner_init(&spin, gt))
1252                 return -ENOMEM;
1253
1254         if (intel_rps_has_interrupts(rps))
1255                 pr_info("RPS has interrupt support\n");
1256         if (intel_rps_uses_timer(rps))
1257                 pr_info("RPS has timer support\n");
1258
1259         for_each_engine(engine, gt, id) {
1260                 struct i915_request *rq;
1261                 struct {
1262                         ktime_t dt;
1263                         u8 freq;
1264                 } min, max;
1265
1266                 if (!intel_engine_can_store_dword(engine))
1267                         continue;
1268
1269                 intel_gt_pm_wait_for_idle(gt);
1270                 GEM_BUG_ON(intel_rps_is_active(rps));
1271                 rps->cur_freq = rps->min_freq;
1272
1273                 intel_engine_pm_get(engine);
1274                 intel_rc6_disable(&gt->rc6);
1275                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1276
1277                 rq = igt_spinner_create_request(&spin,
1278                                                 engine->kernel_context,
1279                                                 MI_NOOP);
1280                 if (IS_ERR(rq)) {
1281                         err = PTR_ERR(rq);
1282                         goto err;
1283                 }
1284
1285                 i915_request_add(rq);
1286
1287                 max.dt = ktime_get();
1288                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1289                 max.dt = ktime_sub(ktime_get(), max.dt);
1290
1291                 igt_spinner_end(&spin);
1292
1293                 min.dt = ktime_get();
1294                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1295                 min.dt = ktime_sub(ktime_get(), min.dt);
1296
1297                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1298                         engine->name,
1299                         max.freq, intel_gpu_freq(rps, max.freq),
1300                         ktime_to_ns(max.dt),
1301                         min.freq, intel_gpu_freq(rps, min.freq),
1302                         ktime_to_ns(min.dt));
1303                 if (min.freq >= max.freq) {
1304                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1305                                engine->name);
1306                         err = -EINVAL;
1307                 }
1308
1309 err:
1310                 intel_rc6_enable(&gt->rc6);
1311                 intel_engine_pm_put(engine);
1312
1313                 if (igt_flush_test(gt->i915))
1314                         err = -EIO;
1315                 if (err)
1316                         break;
1317         }
1318
1319         igt_spinner_fini(&spin);
1320
1321         return err;
1322 }
This page took 0.120292 seconds and 4 git commands to generate.