]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/selftest_rps.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "gem/i915_gem_internal.h"
10
11 #include "i915_reg.h"
12 #include "intel_engine_heartbeat.h"
13 #include "intel_engine_pm.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt_clock_utils.h"
17 #include "intel_gt_pm.h"
18 #include "intel_rc6.h"
19 #include "selftest_engine_heartbeat.h"
20 #include "selftest_rps.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_spinner.h"
23 #include "selftests/librapl.h"
24
25 /* Try to isolate the impact of cstates from determing frequency response */
26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27
28 static void dummy_rps_work(struct work_struct *wrk)
29 {
30 }
31
32 static int cmp_u64(const void *A, const void *B)
33 {
34         const u64 *a = A, *b = B;
35
36         if (*a < *b)
37                 return -1;
38         else if (*a > *b)
39                 return 1;
40         else
41                 return 0;
42 }
43
44 static int cmp_u32(const void *A, const void *B)
45 {
46         const u32 *a = A, *b = B;
47
48         if (*a < *b)
49                 return -1;
50         else if (*a > *b)
51                 return 1;
52         else
53                 return 0;
54 }
55
56 static struct i915_vma *
57 create_spin_counter(struct intel_engine_cs *engine,
58                     struct i915_address_space *vm,
59                     bool srm,
60                     u32 **cancel,
61                     u32 **counter)
62 {
63         enum {
64                 COUNT,
65                 INC,
66                 __NGPR__,
67         };
68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69         struct drm_i915_gem_object *obj;
70         struct i915_vma *vma;
71         unsigned long end;
72         u32 *base, *cs;
73         int loop, i;
74         int err;
75
76         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77         if (IS_ERR(obj))
78                 return ERR_CAST(obj);
79
80         end = obj->base.size / sizeof(u32) - 1;
81
82         vma = i915_vma_instance(obj, vm, NULL);
83         if (IS_ERR(vma)) {
84                 err = PTR_ERR(vma);
85                 goto err_put;
86         }
87
88         err = i915_vma_pin(vma, 0, 0, PIN_USER);
89         if (err)
90                 goto err_unlock;
91
92         i915_vma_lock(vma);
93
94         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95         if (IS_ERR(base)) {
96                 err = PTR_ERR(base);
97                 goto err_unpin;
98         }
99         cs = base;
100
101         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102         for (i = 0; i < __NGPR__; i++) {
103                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104                 *cs++ = 0;
105                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106                 *cs++ = 0;
107         }
108
109         *cs++ = MI_LOAD_REGISTER_IMM(1);
110         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111         *cs++ = 1;
112
113         loop = cs - base;
114
115         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116         for (i = 0; i < 1024; i++) {
117                 *cs++ = MI_MATH(4);
118                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120                 *cs++ = MI_MATH_ADD;
121                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122
123                 if (srm) {
124                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
125                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126                         *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127                         *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128                 }
129         }
130
131         *cs++ = MI_BATCH_BUFFER_START_GEN8;
132         *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133         *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134         GEM_BUG_ON(cs - base > end);
135
136         i915_gem_object_flush_map(obj);
137
138         *cancel = base + loop;
139         *counter = srm ? memset32(base + end, 0, 1) : NULL;
140         return vma;
141
142 err_unpin:
143         i915_vma_unpin(vma);
144 err_unlock:
145         i915_vma_unlock(vma);
146 err_put:
147         i915_gem_object_put(obj);
148         return ERR_PTR(err);
149 }
150
151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152 {
153         u8 history[64], i;
154         unsigned long end;
155         int sleep;
156
157         i = 0;
158         memset(history, freq, sizeof(history));
159         sleep = 20;
160
161         /* The PCU does not change instantly, but drifts towards the goal? */
162         end = jiffies + msecs_to_jiffies(timeout_ms);
163         do {
164                 u8 act;
165
166                 act = read_cagf(rps);
167                 if (time_after(jiffies, end))
168                         return act;
169
170                 /* Target acquired */
171                 if (act == freq)
172                         return act;
173
174                 /* Any change within the last N samples? */
175                 if (!memchr_inv(history, act, sizeof(history)))
176                         return act;
177
178                 history[i] = act;
179                 i = (i + 1) % ARRAY_SIZE(history);
180
181                 usleep_range(sleep, 2 * sleep);
182                 sleep *= 2;
183                 if (sleep > timeout_ms * 20)
184                         sleep = timeout_ms * 20;
185         } while (1);
186 }
187
188 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 {
190         mutex_lock(&rps->lock);
191         GEM_BUG_ON(!intel_rps_is_active(rps));
192         if (wait_for(!intel_rps_set(rps, freq), 50)) {
193                 mutex_unlock(&rps->lock);
194                 return 0;
195         }
196         GEM_BUG_ON(rps->last_freq != freq);
197         mutex_unlock(&rps->lock);
198
199         return wait_for_freq(rps, freq, 50);
200 }
201
202 static void show_pstate_limits(struct intel_rps *rps)
203 {
204         struct drm_i915_private *i915 = rps_to_i915(rps);
205
206         if (IS_BROXTON(i915)) {
207                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209                         intel_uncore_read(rps_to_uncore(rps),
210                                           BXT_RP_STATE_CAP));
211         } else if (GRAPHICS_VER(i915) == 9) {
212                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214                         intel_uncore_read(rps_to_uncore(rps),
215                                           GEN9_RP_STATE_LIMITS));
216         }
217 }
218
219 int live_rps_clock_interval(void *arg)
220 {
221         struct intel_gt *gt = arg;
222         struct intel_rps *rps = &gt->rps;
223         void (*saved_work)(struct work_struct *wrk);
224         struct intel_engine_cs *engine;
225         enum intel_engine_id id;
226         struct igt_spinner spin;
227         intel_wakeref_t wakeref;
228         int err = 0;
229
230         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
231                 return 0;
232
233         if (igt_spinner_init(&spin, gt))
234                 return -ENOMEM;
235
236         intel_gt_pm_wait_for_idle(gt);
237         saved_work = rps->work.func;
238         rps->work.func = dummy_rps_work;
239
240         wakeref = intel_gt_pm_get(gt);
241         intel_rps_disable(&gt->rps);
242
243         intel_gt_check_clock_frequency(gt);
244
245         for_each_engine(engine, gt, id) {
246                 struct i915_request *rq;
247                 u32 cycles;
248                 u64 dt;
249
250                 if (!intel_engine_can_store_dword(engine))
251                         continue;
252
253                 st_engine_heartbeat_disable(engine);
254
255                 rq = igt_spinner_create_request(&spin,
256                                                 engine->kernel_context,
257                                                 MI_NOOP);
258                 if (IS_ERR(rq)) {
259                         st_engine_heartbeat_enable(engine);
260                         err = PTR_ERR(rq);
261                         break;
262                 }
263
264                 i915_request_add(rq);
265
266                 if (!igt_wait_for_spinner(&spin, rq)) {
267                         pr_err("%s: RPS spinner did not start\n",
268                                engine->name);
269                         igt_spinner_end(&spin);
270                         st_engine_heartbeat_enable(engine);
271                         intel_gt_set_wedged(engine->gt);
272                         err = -EIO;
273                         break;
274                 }
275
276                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
277
278                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
279
280                 /* Set the evaluation interval to infinity! */
281                 intel_uncore_write_fw(gt->uncore,
282                                       GEN6_RP_UP_EI, 0xffffffff);
283                 intel_uncore_write_fw(gt->uncore,
284                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
285
286                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
287                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
288
289                 if (wait_for(intel_uncore_read_fw(gt->uncore,
290                                                   GEN6_RP_CUR_UP_EI),
291                              10)) {
292                         /* Just skip the test; assume lack of HW support */
293                         pr_notice("%s: rps evaluation interval not ticking\n",
294                                   engine->name);
295                         err = -ENODEV;
296                 } else {
297                         ktime_t dt_[5];
298                         u32 cycles_[5];
299                         int i;
300
301                         for (i = 0; i < 5; i++) {
302                                 preempt_disable();
303
304                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
305                                 dt_[i] = ktime_get();
306
307                                 udelay(1000);
308
309                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
310                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
311
312                                 preempt_enable();
313                         }
314
315                         /* Use the median of both cycle/dt; close enough */
316                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
317                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
318                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
319                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
320                 }
321
322                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
323                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
324
325                 igt_spinner_end(&spin);
326                 st_engine_heartbeat_enable(engine);
327
328                 if (err == 0) {
329                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
330                         u32 expected =
331                                 intel_gt_ns_to_pm_interval(gt, dt);
332
333                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
334                                 engine->name, cycles, time, dt, expected,
335                                 gt->clock_frequency / 1000);
336
337                         if (10 * time < 8 * dt ||
338                             8 * time > 10 * dt) {
339                                 pr_err("%s: rps clock time does not match walltime!\n",
340                                        engine->name);
341                                 err = -EINVAL;
342                         }
343
344                         if (10 * expected < 8 * cycles ||
345                             8 * expected > 10 * cycles) {
346                                 pr_err("%s: walltime does not match rps clock ticks!\n",
347                                        engine->name);
348                                 err = -EINVAL;
349                         }
350                 }
351
352                 if (igt_flush_test(gt->i915))
353                         err = -EIO;
354
355                 break; /* once is enough */
356         }
357
358         intel_rps_enable(&gt->rps);
359         intel_gt_pm_put(gt, wakeref);
360
361         igt_spinner_fini(&spin);
362
363         intel_gt_pm_wait_for_idle(gt);
364         rps->work.func = saved_work;
365
366         if (err == -ENODEV) /* skipped, don't report a fail */
367                 err = 0;
368
369         return err;
370 }
371
372 int live_rps_control(void *arg)
373 {
374         struct intel_gt *gt = arg;
375         struct intel_rps *rps = &gt->rps;
376         void (*saved_work)(struct work_struct *wrk);
377         struct intel_engine_cs *engine;
378         enum intel_engine_id id;
379         struct igt_spinner spin;
380         intel_wakeref_t wakeref;
381         int err = 0;
382
383         /*
384          * Check that the actual frequency matches our requested frequency,
385          * to verify our control mechanism. We have to be careful that the
386          * PCU may throttle the GPU in which case the actual frequency used
387          * will be lowered than requested.
388          */
389
390         if (!intel_rps_is_enabled(rps))
391                 return 0;
392
393         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
394                 return 0;
395
396         if (igt_spinner_init(&spin, gt))
397                 return -ENOMEM;
398
399         intel_gt_pm_wait_for_idle(gt);
400         saved_work = rps->work.func;
401         rps->work.func = dummy_rps_work;
402
403         wakeref = intel_gt_pm_get(gt);
404         for_each_engine(engine, gt, id) {
405                 struct i915_request *rq;
406                 ktime_t min_dt, max_dt;
407                 int f, limit;
408                 int min, max;
409
410                 if (!intel_engine_can_store_dword(engine))
411                         continue;
412
413                 st_engine_heartbeat_disable(engine);
414
415                 rq = igt_spinner_create_request(&spin,
416                                                 engine->kernel_context,
417                                                 MI_NOOP);
418                 if (IS_ERR(rq)) {
419                         err = PTR_ERR(rq);
420                         break;
421                 }
422
423                 i915_request_add(rq);
424
425                 if (!igt_wait_for_spinner(&spin, rq)) {
426                         pr_err("%s: RPS spinner did not start\n",
427                                engine->name);
428                         igt_spinner_end(&spin);
429                         st_engine_heartbeat_enable(engine);
430                         intel_gt_set_wedged(engine->gt);
431                         err = -EIO;
432                         break;
433                 }
434
435                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
437                                engine->name, rps->min_freq, read_cagf(rps));
438                         igt_spinner_end(&spin);
439                         st_engine_heartbeat_enable(engine);
440                         show_pstate_limits(rps);
441                         err = -EINVAL;
442                         break;
443                 }
444
445                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
446                         if (rps_set_check(rps, f) < f)
447                                 break;
448                 }
449
450                 limit = rps_set_check(rps, f);
451
452                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
453                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
454                                engine->name, rps->min_freq, read_cagf(rps));
455                         igt_spinner_end(&spin);
456                         st_engine_heartbeat_enable(engine);
457                         show_pstate_limits(rps);
458                         err = -EINVAL;
459                         break;
460                 }
461
462                 max_dt = ktime_get();
463                 max = rps_set_check(rps, limit);
464                 max_dt = ktime_sub(ktime_get(), max_dt);
465
466                 min_dt = ktime_get();
467                 min = rps_set_check(rps, rps->min_freq);
468                 min_dt = ktime_sub(ktime_get(), min_dt);
469
470                 igt_spinner_end(&spin);
471                 st_engine_heartbeat_enable(engine);
472
473                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
474                         engine->name,
475                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
476                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
477                         limit, intel_gpu_freq(rps, limit),
478                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
479
480                 if (limit == rps->min_freq) {
481                         pr_err("%s: GPU throttled to minimum!\n",
482                                engine->name);
483                         show_pstate_limits(rps);
484                         err = -ENODEV;
485                         break;
486                 }
487
488                 if (igt_flush_test(gt->i915)) {
489                         err = -EIO;
490                         break;
491                 }
492         }
493         intel_gt_pm_put(gt, wakeref);
494
495         igt_spinner_fini(&spin);
496
497         intel_gt_pm_wait_for_idle(gt);
498         rps->work.func = saved_work;
499
500         return err;
501 }
502
503 static void show_pcu_config(struct intel_rps *rps)
504 {
505         struct drm_i915_private *i915 = rps_to_i915(rps);
506         unsigned int max_gpu_freq, min_gpu_freq;
507         intel_wakeref_t wakeref;
508         int gpu_freq;
509
510         if (!HAS_LLC(i915))
511                 return;
512
513         min_gpu_freq = rps->min_freq;
514         max_gpu_freq = rps->max_freq;
515         if (GRAPHICS_VER(i915) >= 9) {
516                 /* Convert GT frequency to 50 HZ units */
517                 min_gpu_freq /= GEN9_FREQ_SCALER;
518                 max_gpu_freq /= GEN9_FREQ_SCALER;
519         }
520
521         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
522
523         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
524         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
525                 int ia_freq = gpu_freq;
526
527                 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
528                                &ia_freq, NULL);
529
530                 pr_info("%5d  %5d  %5d\n",
531                         gpu_freq * 50,
532                         ((ia_freq >> 0) & 0xff) * 100,
533                         ((ia_freq >> 8) & 0xff) * 100);
534         }
535
536         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
537 }
538
539 static u64 __measure_frequency(u32 *cntr, int duration_ms)
540 {
541         u64 dc, dt;
542
543         dc = READ_ONCE(*cntr);
544         dt = ktime_get();
545         usleep_range(1000 * duration_ms, 2000 * duration_ms);
546         dc = READ_ONCE(*cntr) - dc;
547         dt = ktime_get() - dt;
548
549         return div64_u64(1000 * 1000 * dc, dt);
550 }
551
552 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
553 {
554         u64 x[5];
555         int i;
556
557         *freq = rps_set_check(rps, *freq);
558         for (i = 0; i < 5; i++)
559                 x[i] = __measure_frequency(cntr, 2);
560         *freq = (*freq + read_cagf(rps)) / 2;
561
562         /* A simple triangle filter for better result stability */
563         sort(x, 5, sizeof(*x), cmp_u64, NULL);
564         return div_u64(x[1] + 2 * x[2] + x[3], 4);
565 }
566
567 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
568                                   int duration_ms)
569 {
570         u64 dc, dt;
571
572         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
573         dt = ktime_get();
574         usleep_range(1000 * duration_ms, 2000 * duration_ms);
575         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
576         dt = ktime_get() - dt;
577
578         return div64_u64(1000 * 1000 * dc, dt);
579 }
580
581 static u64 measure_cs_frequency_at(struct intel_rps *rps,
582                                    struct intel_engine_cs *engine,
583                                    int *freq)
584 {
585         u64 x[5];
586         int i;
587
588         *freq = rps_set_check(rps, *freq);
589         for (i = 0; i < 5; i++)
590                 x[i] = __measure_cs_frequency(engine, 2);
591         *freq = (*freq + read_cagf(rps)) / 2;
592
593         /* A simple triangle filter for better result stability */
594         sort(x, 5, sizeof(*x), cmp_u64, NULL);
595         return div_u64(x[1] + 2 * x[2] + x[3], 4);
596 }
597
598 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
599 {
600         return f_d * x > f_n * y && f_n * x < f_d * y;
601 }
602
603 int live_rps_frequency_cs(void *arg)
604 {
605         void (*saved_work)(struct work_struct *wrk);
606         struct intel_gt *gt = arg;
607         struct intel_rps *rps = &gt->rps;
608         struct intel_engine_cs *engine;
609         struct pm_qos_request qos;
610         enum intel_engine_id id;
611         int err = 0;
612
613         /*
614          * The premise is that the GPU does change frequency at our behest.
615          * Let's check there is a correspondence between the requested
616          * frequency, the actual frequency, and the observed clock rate.
617          */
618
619         if (!intel_rps_is_enabled(rps))
620                 return 0;
621
622         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
623                 return 0;
624
625         if (CPU_LATENCY >= 0)
626                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
627
628         intel_gt_pm_wait_for_idle(gt);
629         saved_work = rps->work.func;
630         rps->work.func = dummy_rps_work;
631
632         for_each_engine(engine, gt, id) {
633                 struct i915_request *rq;
634                 struct i915_vma *vma;
635                 u32 *cancel, *cntr;
636                 struct {
637                         u64 count;
638                         int freq;
639                 } min, max;
640
641                 st_engine_heartbeat_disable(engine);
642
643                 vma = create_spin_counter(engine,
644                                           engine->kernel_context->vm, false,
645                                           &cancel, &cntr);
646                 if (IS_ERR(vma)) {
647                         err = PTR_ERR(vma);
648                         st_engine_heartbeat_enable(engine);
649                         break;
650                 }
651
652                 rq = intel_engine_create_kernel_request(engine);
653                 if (IS_ERR(rq)) {
654                         err = PTR_ERR(rq);
655                         goto err_vma;
656                 }
657
658                 err = i915_vma_move_to_active(vma, rq, 0);
659                 if (!err)
660                         err = rq->engine->emit_bb_start(rq,
661                                                         i915_vma_offset(vma),
662                                                         PAGE_SIZE, 0);
663                 i915_request_add(rq);
664                 if (err)
665                         goto err_vma;
666
667                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
668                              10)) {
669                         pr_err("%s: timed loop did not start\n",
670                                engine->name);
671                         goto err_vma;
672                 }
673
674                 min.freq = rps->min_freq;
675                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
676
677                 max.freq = rps->max_freq;
678                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
679
680                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
681                         engine->name,
682                         min.count, intel_gpu_freq(rps, min.freq),
683                         max.count, intel_gpu_freq(rps, max.freq),
684                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
685                                                      max.freq * min.count));
686
687                 if (!scaled_within(max.freq * min.count,
688                                    min.freq * max.count,
689                                    2, 3)) {
690                         int f;
691
692                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
693                                engine->name,
694                                max.freq * min.count,
695                                min.freq * max.count);
696                         show_pcu_config(rps);
697
698                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
699                                 int act = f;
700                                 u64 count;
701
702                                 count = measure_cs_frequency_at(rps, engine, &act);
703                                 if (act < f)
704                                         break;
705
706                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
707                                         engine->name,
708                                         act, intel_gpu_freq(rps, act), count,
709                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
710                                                                      act * min.count));
711
712                                 f = act; /* may skip ahead [pcu granularity] */
713                         }
714
715                         err = -EINTR; /* ignore error, continue on with test */
716                 }
717
718 err_vma:
719                 *cancel = MI_BATCH_BUFFER_END;
720                 i915_gem_object_flush_map(vma->obj);
721                 i915_gem_object_unpin_map(vma->obj);
722                 i915_vma_unpin(vma);
723                 i915_vma_unlock(vma);
724                 i915_vma_put(vma);
725
726                 st_engine_heartbeat_enable(engine);
727                 if (igt_flush_test(gt->i915))
728                         err = -EIO;
729                 if (err)
730                         break;
731         }
732
733         intel_gt_pm_wait_for_idle(gt);
734         rps->work.func = saved_work;
735
736         if (CPU_LATENCY >= 0)
737                 cpu_latency_qos_remove_request(&qos);
738
739         return err;
740 }
741
742 int live_rps_frequency_srm(void *arg)
743 {
744         void (*saved_work)(struct work_struct *wrk);
745         struct intel_gt *gt = arg;
746         struct intel_rps *rps = &gt->rps;
747         struct intel_engine_cs *engine;
748         struct pm_qos_request qos;
749         enum intel_engine_id id;
750         int err = 0;
751
752         /*
753          * The premise is that the GPU does change frequency at our behest.
754          * Let's check there is a correspondence between the requested
755          * frequency, the actual frequency, and the observed clock rate.
756          */
757
758         if (!intel_rps_is_enabled(rps))
759                 return 0;
760
761         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
762                 return 0;
763
764         if (CPU_LATENCY >= 0)
765                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
766
767         intel_gt_pm_wait_for_idle(gt);
768         saved_work = rps->work.func;
769         rps->work.func = dummy_rps_work;
770
771         for_each_engine(engine, gt, id) {
772                 struct i915_request *rq;
773                 struct i915_vma *vma;
774                 u32 *cancel, *cntr;
775                 struct {
776                         u64 count;
777                         int freq;
778                 } min, max;
779
780                 st_engine_heartbeat_disable(engine);
781
782                 vma = create_spin_counter(engine,
783                                           engine->kernel_context->vm, true,
784                                           &cancel, &cntr);
785                 if (IS_ERR(vma)) {
786                         err = PTR_ERR(vma);
787                         st_engine_heartbeat_enable(engine);
788                         break;
789                 }
790
791                 rq = intel_engine_create_kernel_request(engine);
792                 if (IS_ERR(rq)) {
793                         err = PTR_ERR(rq);
794                         goto err_vma;
795                 }
796
797                 err = i915_vma_move_to_active(vma, rq, 0);
798                 if (!err)
799                         err = rq->engine->emit_bb_start(rq,
800                                                         i915_vma_offset(vma),
801                                                         PAGE_SIZE, 0);
802                 i915_request_add(rq);
803                 if (err)
804                         goto err_vma;
805
806                 if (wait_for(READ_ONCE(*cntr), 10)) {
807                         pr_err("%s: timed loop did not start\n",
808                                engine->name);
809                         goto err_vma;
810                 }
811
812                 min.freq = rps->min_freq;
813                 min.count = measure_frequency_at(rps, cntr, &min.freq);
814
815                 max.freq = rps->max_freq;
816                 max.count = measure_frequency_at(rps, cntr, &max.freq);
817
818                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
819                         engine->name,
820                         min.count, intel_gpu_freq(rps, min.freq),
821                         max.count, intel_gpu_freq(rps, max.freq),
822                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
823                                                      max.freq * min.count));
824
825                 if (!scaled_within(max.freq * min.count,
826                                    min.freq * max.count,
827                                    1, 2)) {
828                         int f;
829
830                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
831                                engine->name,
832                                max.freq * min.count,
833                                min.freq * max.count);
834                         show_pcu_config(rps);
835
836                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
837                                 int act = f;
838                                 u64 count;
839
840                                 count = measure_frequency_at(rps, cntr, &act);
841                                 if (act < f)
842                                         break;
843
844                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
845                                         engine->name,
846                                         act, intel_gpu_freq(rps, act), count,
847                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
848                                                                      act * min.count));
849
850                                 f = act; /* may skip ahead [pcu granularity] */
851                         }
852
853                         err = -EINTR; /* ignore error, continue on with test */
854                 }
855
856 err_vma:
857                 *cancel = MI_BATCH_BUFFER_END;
858                 i915_gem_object_flush_map(vma->obj);
859                 i915_gem_object_unpin_map(vma->obj);
860                 i915_vma_unpin(vma);
861                 i915_vma_unlock(vma);
862                 i915_vma_put(vma);
863
864                 st_engine_heartbeat_enable(engine);
865                 if (igt_flush_test(gt->i915))
866                         err = -EIO;
867                 if (err)
868                         break;
869         }
870
871         intel_gt_pm_wait_for_idle(gt);
872         rps->work.func = saved_work;
873
874         if (CPU_LATENCY >= 0)
875                 cpu_latency_qos_remove_request(&qos);
876
877         return err;
878 }
879
880 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
881 {
882         /* Flush any previous EI */
883         usleep_range(timeout_us, 2 * timeout_us);
884
885         /* Reset the interrupt status */
886         rps_disable_interrupts(rps);
887         GEM_BUG_ON(rps->pm_iir);
888         rps_enable_interrupts(rps);
889
890         /* And then wait for the timeout, for real this time */
891         usleep_range(2 * timeout_us, 3 * timeout_us);
892 }
893
894 static int __rps_up_interrupt(struct intel_rps *rps,
895                               struct intel_engine_cs *engine,
896                               struct igt_spinner *spin)
897 {
898         struct intel_uncore *uncore = engine->uncore;
899         struct i915_request *rq;
900         u32 timeout;
901
902         if (!intel_engine_can_store_dword(engine))
903                 return 0;
904
905         rps_set_check(rps, rps->min_freq);
906
907         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
908         if (IS_ERR(rq))
909                 return PTR_ERR(rq);
910
911         i915_request_get(rq);
912         i915_request_add(rq);
913
914         if (!igt_wait_for_spinner(spin, rq)) {
915                 pr_err("%s: RPS spinner did not start\n",
916                        engine->name);
917                 i915_request_put(rq);
918                 intel_gt_set_wedged(engine->gt);
919                 return -EIO;
920         }
921
922         if (!intel_rps_is_active(rps)) {
923                 pr_err("%s: RPS not enabled on starting spinner\n",
924                        engine->name);
925                 igt_spinner_end(spin);
926                 i915_request_put(rq);
927                 return -EINVAL;
928         }
929
930         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
931                 pr_err("%s: RPS did not register UP interrupt\n",
932                        engine->name);
933                 i915_request_put(rq);
934                 return -EINVAL;
935         }
936
937         if (rps->last_freq != rps->min_freq) {
938                 pr_err("%s: RPS did not program min frequency\n",
939                        engine->name);
940                 i915_request_put(rq);
941                 return -EINVAL;
942         }
943
944         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
945         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
946         timeout = DIV_ROUND_UP(timeout, 1000);
947
948         sleep_for_ei(rps, timeout);
949         GEM_BUG_ON(i915_request_completed(rq));
950
951         igt_spinner_end(spin);
952         i915_request_put(rq);
953
954         if (rps->cur_freq != rps->min_freq) {
955                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
956                        engine->name, intel_rps_read_actual_frequency(rps));
957                 return -EINVAL;
958         }
959
960         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
961                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
962                        engine->name, rps->pm_iir,
963                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
964                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
965                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
966                 return -EINVAL;
967         }
968
969         return 0;
970 }
971
972 static int __rps_down_interrupt(struct intel_rps *rps,
973                                 struct intel_engine_cs *engine)
974 {
975         struct intel_uncore *uncore = engine->uncore;
976         u32 timeout;
977
978         rps_set_check(rps, rps->max_freq);
979
980         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
981                 pr_err("%s: RPS did not register DOWN interrupt\n",
982                        engine->name);
983                 return -EINVAL;
984         }
985
986         if (rps->last_freq != rps->max_freq) {
987                 pr_err("%s: RPS did not program max frequency\n",
988                        engine->name);
989                 return -EINVAL;
990         }
991
992         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
993         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
994         timeout = DIV_ROUND_UP(timeout, 1000);
995
996         sleep_for_ei(rps, timeout);
997
998         if (rps->cur_freq != rps->max_freq) {
999                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1000                        engine->name,
1001                        intel_rps_read_actual_frequency(rps));
1002                 return -EINVAL;
1003         }
1004
1005         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1006                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1007                        engine->name, rps->pm_iir,
1008                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1009                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1010                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1011                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1012                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1013                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1014                 return -EINVAL;
1015         }
1016
1017         return 0;
1018 }
1019
1020 int live_rps_interrupt(void *arg)
1021 {
1022         struct intel_gt *gt = arg;
1023         struct intel_rps *rps = &gt->rps;
1024         void (*saved_work)(struct work_struct *wrk);
1025         struct intel_engine_cs *engine;
1026         enum intel_engine_id id;
1027         struct igt_spinner spin;
1028         intel_wakeref_t wakeref;
1029         u32 pm_events;
1030         int err = 0;
1031
1032         /*
1033          * First, let's check whether or not we are receiving interrupts.
1034          */
1035
1036         if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037                 return 0;
1038
1039         pm_events = 0;
1040         with_intel_gt_pm(gt, wakeref)
1041                 pm_events = rps->pm_events;
1042         if (!pm_events) {
1043                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044                 return -ENODEV;
1045         }
1046
1047         if (igt_spinner_init(&spin, gt))
1048                 return -ENOMEM;
1049
1050         intel_gt_pm_wait_for_idle(gt);
1051         saved_work = rps->work.func;
1052         rps->work.func = dummy_rps_work;
1053
1054         for_each_engine(engine, gt, id) {
1055                 /* Keep the engine busy with a spinner; expect an UP! */
1056                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057                         intel_gt_pm_wait_for_idle(engine->gt);
1058                         GEM_BUG_ON(intel_rps_is_active(rps));
1059
1060                         st_engine_heartbeat_disable(engine);
1061
1062                         err = __rps_up_interrupt(rps, engine, &spin);
1063
1064                         st_engine_heartbeat_enable(engine);
1065                         if (err)
1066                                 goto out;
1067
1068                         intel_gt_pm_wait_for_idle(engine->gt);
1069                 }
1070
1071                 /* Keep the engine awake but idle and check for DOWN */
1072                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073                         st_engine_heartbeat_disable(engine);
1074                         intel_rc6_disable(&gt->rc6);
1075
1076                         err = __rps_down_interrupt(rps, engine);
1077
1078                         intel_rc6_enable(&gt->rc6);
1079                         st_engine_heartbeat_enable(engine);
1080                         if (err)
1081                                 goto out;
1082                 }
1083         }
1084
1085 out:
1086         if (igt_flush_test(gt->i915))
1087                 err = -EIO;
1088
1089         igt_spinner_fini(&spin);
1090
1091         intel_gt_pm_wait_for_idle(gt);
1092         rps->work.func = saved_work;
1093
1094         return err;
1095 }
1096
1097 static u64 __measure_power(int duration_ms)
1098 {
1099         u64 dE, dt;
1100
1101         dE = librapl_energy_uJ();
1102         dt = ktime_get();
1103         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104         dE = librapl_energy_uJ() - dE;
1105         dt = ktime_get() - dt;
1106
1107         return div64_u64(1000 * 1000 * dE, dt);
1108 }
1109
1110 static u64 measure_power(struct intel_rps *rps, int *freq)
1111 {
1112         u64 x[5];
1113         int i;
1114
1115         for (i = 0; i < 5; i++)
1116                 x[i] = __measure_power(5);
1117
1118         *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1119
1120         /* A simple triangle filter for better result stability */
1121         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123 }
1124
1125 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1126 {
1127         *freq = rps_set_check(rps, *freq);
1128         msleep(100);
1129         return measure_power(rps, freq);
1130 }
1131
1132 int live_rps_power(void *arg)
1133 {
1134         struct intel_gt *gt = arg;
1135         struct intel_rps *rps = &gt->rps;
1136         void (*saved_work)(struct work_struct *wrk);
1137         struct intel_engine_cs *engine;
1138         enum intel_engine_id id;
1139         struct igt_spinner spin;
1140         int err = 0;
1141
1142         /*
1143          * Our fundamental assumption is that running at lower frequency
1144          * actually saves power. Let's see if our RAPL measurement support
1145          * that theory.
1146          */
1147
1148         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1149                 return 0;
1150
1151         if (!librapl_supported(gt->i915))
1152                 return 0;
1153
1154         if (igt_spinner_init(&spin, gt))
1155                 return -ENOMEM;
1156
1157         intel_gt_pm_wait_for_idle(gt);
1158         saved_work = rps->work.func;
1159         rps->work.func = dummy_rps_work;
1160
1161         for_each_engine(engine, gt, id) {
1162                 struct i915_request *rq;
1163                 struct {
1164                         u64 power;
1165                         int freq;
1166                 } min, max;
1167
1168                 if (!intel_engine_can_store_dword(engine))
1169                         continue;
1170
1171                 st_engine_heartbeat_disable(engine);
1172
1173                 rq = igt_spinner_create_request(&spin,
1174                                                 engine->kernel_context,
1175                                                 MI_NOOP);
1176                 if (IS_ERR(rq)) {
1177                         st_engine_heartbeat_enable(engine);
1178                         err = PTR_ERR(rq);
1179                         break;
1180                 }
1181
1182                 i915_request_add(rq);
1183
1184                 if (!igt_wait_for_spinner(&spin, rq)) {
1185                         pr_err("%s: RPS spinner did not start\n",
1186                                engine->name);
1187                         igt_spinner_end(&spin);
1188                         st_engine_heartbeat_enable(engine);
1189                         intel_gt_set_wedged(engine->gt);
1190                         err = -EIO;
1191                         break;
1192                 }
1193
1194                 max.freq = rps->max_freq;
1195                 max.power = measure_power_at(rps, &max.freq);
1196
1197                 min.freq = rps->min_freq;
1198                 min.power = measure_power_at(rps, &min.freq);
1199
1200                 igt_spinner_end(&spin);
1201                 st_engine_heartbeat_enable(engine);
1202
1203                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1204                         engine->name,
1205                         min.power, intel_gpu_freq(rps, min.freq),
1206                         max.power, intel_gpu_freq(rps, max.freq));
1207
1208                 if (10 * min.freq >= 9 * max.freq) {
1209                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1210                                   min.freq, intel_gpu_freq(rps, min.freq),
1211                                   max.freq, intel_gpu_freq(rps, max.freq));
1212                         continue;
1213                 }
1214
1215                 if (11 * min.power > 10 * max.power) {
1216                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1217                                engine->name);
1218                         err = -EINVAL;
1219                         break;
1220                 }
1221
1222                 if (igt_flush_test(gt->i915)) {
1223                         err = -EIO;
1224                         break;
1225                 }
1226         }
1227
1228         igt_spinner_fini(&spin);
1229
1230         intel_gt_pm_wait_for_idle(gt);
1231         rps->work.func = saved_work;
1232
1233         return err;
1234 }
1235
1236 int live_rps_dynamic(void *arg)
1237 {
1238         struct intel_gt *gt = arg;
1239         struct intel_rps *rps = &gt->rps;
1240         struct intel_engine_cs *engine;
1241         enum intel_engine_id id;
1242         struct igt_spinner spin;
1243         int err = 0;
1244
1245         /*
1246          * We've looked at the bascs, and have established that we
1247          * can change the clock frequency and that the HW will generate
1248          * interrupts based on load. Now we check how we integrate those
1249          * moving parts into dynamic reclocking based on load.
1250          */
1251
1252         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1253                 return 0;
1254
1255         if (igt_spinner_init(&spin, gt))
1256                 return -ENOMEM;
1257
1258         if (intel_rps_has_interrupts(rps))
1259                 pr_info("RPS has interrupt support\n");
1260         if (intel_rps_uses_timer(rps))
1261                 pr_info("RPS has timer support\n");
1262
1263         for_each_engine(engine, gt, id) {
1264                 struct i915_request *rq;
1265                 struct {
1266                         ktime_t dt;
1267                         u8 freq;
1268                 } min, max;
1269
1270                 if (!intel_engine_can_store_dword(engine))
1271                         continue;
1272
1273                 intel_gt_pm_wait_for_idle(gt);
1274                 GEM_BUG_ON(intel_rps_is_active(rps));
1275                 rps->cur_freq = rps->min_freq;
1276
1277                 intel_engine_pm_get(engine);
1278                 intel_rc6_disable(&gt->rc6);
1279                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1280
1281                 rq = igt_spinner_create_request(&spin,
1282                                                 engine->kernel_context,
1283                                                 MI_NOOP);
1284                 if (IS_ERR(rq)) {
1285                         err = PTR_ERR(rq);
1286                         goto err;
1287                 }
1288
1289                 i915_request_add(rq);
1290
1291                 max.dt = ktime_get();
1292                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1293                 max.dt = ktime_sub(ktime_get(), max.dt);
1294
1295                 igt_spinner_end(&spin);
1296
1297                 min.dt = ktime_get();
1298                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1299                 min.dt = ktime_sub(ktime_get(), min.dt);
1300
1301                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1302                         engine->name,
1303                         max.freq, intel_gpu_freq(rps, max.freq),
1304                         ktime_to_ns(max.dt),
1305                         min.freq, intel_gpu_freq(rps, min.freq),
1306                         ktime_to_ns(min.dt));
1307                 if (min.freq >= max.freq) {
1308                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1309                                engine->name);
1310                         err = -EINVAL;
1311                 }
1312
1313 err:
1314                 intel_rc6_enable(&gt->rc6);
1315                 intel_engine_pm_put(engine);
1316
1317                 if (igt_flush_test(gt->i915))
1318                         err = -EIO;
1319                 if (err)
1320                         break;
1321         }
1322
1323         igt_spinner_fini(&spin);
1324
1325         return err;
1326 }
This page took 0.104772 seconds and 4 git commands to generate.