]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/i915_pmu.c
drm/amd/display: Filter out AC mode frequencies on DC mode systems
[linux.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include <linux/pm_runtime.h>
8
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_regs.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_gt_regs.h"
15 #include "gt/intel_rc6.h"
16 #include "gt/intel_rps.h"
17
18 #include "i915_drv.h"
19 #include "i915_pmu.h"
20
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24
25 #define ENGINE_SAMPLE_MASK \
26         (BIT(I915_SAMPLE_BUSY) | \
27          BIT(I915_SAMPLE_WAIT) | \
28          BIT(I915_SAMPLE_SEMA))
29
30 static cpumask_t i915_pmu_cpumask;
31 static unsigned int i915_pmu_target_cpu = -1;
32
33 static u8 engine_config_sample(u64 config)
34 {
35         return config & I915_PMU_SAMPLE_MASK;
36 }
37
38 static u8 engine_event_sample(struct perf_event *event)
39 {
40         return engine_config_sample(event->attr.config);
41 }
42
43 static u8 engine_event_class(struct perf_event *event)
44 {
45         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
46 }
47
48 static u8 engine_event_instance(struct perf_event *event)
49 {
50         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
51 }
52
53 static bool is_engine_config(u64 config)
54 {
55         return config < __I915_PMU_OTHER(0);
56 }
57
58 static unsigned int other_bit(const u64 config)
59 {
60         unsigned int val;
61
62         switch (config) {
63         case I915_PMU_ACTUAL_FREQUENCY:
64                 val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
65                 break;
66         case I915_PMU_REQUESTED_FREQUENCY:
67                 val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
68                 break;
69         case I915_PMU_RC6_RESIDENCY:
70                 val = __I915_PMU_RC6_RESIDENCY_ENABLED;
71                 break;
72         default:
73                 /*
74                  * Events that do not require sampling, or tracking state
75                  * transitions between enabled and disabled can be ignored.
76                  */
77                 return -1;
78         }
79
80         return I915_ENGINE_SAMPLE_COUNT + val;
81 }
82
83 static unsigned int config_bit(const u64 config)
84 {
85         if (is_engine_config(config))
86                 return engine_config_sample(config);
87         else
88                 return other_bit(config);
89 }
90
91 static u64 config_mask(u64 config)
92 {
93         return BIT_ULL(config_bit(config));
94 }
95
96 static bool is_engine_event(struct perf_event *event)
97 {
98         return is_engine_config(event->attr.config);
99 }
100
101 static unsigned int event_bit(struct perf_event *event)
102 {
103         return config_bit(event->attr.config);
104 }
105
106 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
107 {
108         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
109         u32 enable;
110
111         /*
112          * Only some counters need the sampling timer.
113          *
114          * We start with a bitmask of all currently enabled events.
115          */
116         enable = pmu->enable;
117
118         /*
119          * Mask out all the ones which do not need the timer, or in
120          * other words keep all the ones that could need the timer.
121          */
122         enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
123                   config_mask(I915_PMU_REQUESTED_FREQUENCY) |
124                   ENGINE_SAMPLE_MASK;
125
126         /*
127          * When the GPU is idle per-engine counters do not need to be
128          * running so clear those bits out.
129          */
130         if (!gpu_active)
131                 enable &= ~ENGINE_SAMPLE_MASK;
132         /*
133          * Also there is software busyness tracking available we do not
134          * need the timer for I915_SAMPLE_BUSY counter.
135          */
136         else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
137                 enable &= ~BIT(I915_SAMPLE_BUSY);
138
139         /*
140          * If some bits remain it means we need the sampling timer running.
141          */
142         return enable;
143 }
144
145 static u64 __get_rc6(struct intel_gt *gt)
146 {
147         struct drm_i915_private *i915 = gt->i915;
148         u64 val;
149
150         val = intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6);
151
152         if (HAS_RC6p(i915))
153                 val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6p);
154
155         if (HAS_RC6pp(i915))
156                 val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6pp);
157
158         return val;
159 }
160
161 static inline s64 ktime_since_raw(const ktime_t kt)
162 {
163         return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
164 }
165
166 static u64 get_rc6(struct intel_gt *gt)
167 {
168         struct drm_i915_private *i915 = gt->i915;
169         struct i915_pmu *pmu = &i915->pmu;
170         unsigned long flags;
171         bool awake = false;
172         u64 val;
173
174         if (intel_gt_pm_get_if_awake(gt)) {
175                 val = __get_rc6(gt);
176                 intel_gt_pm_put_async(gt);
177                 awake = true;
178         }
179
180         spin_lock_irqsave(&pmu->lock, flags);
181
182         if (awake) {
183                 pmu->sample[__I915_SAMPLE_RC6].cur = val;
184         } else {
185                 /*
186                  * We think we are runtime suspended.
187                  *
188                  * Report the delta from when the device was suspended to now,
189                  * on top of the last known real value, as the approximated RC6
190                  * counter value.
191                  */
192                 val = ktime_since_raw(pmu->sleep_last);
193                 val += pmu->sample[__I915_SAMPLE_RC6].cur;
194         }
195
196         if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
197                 val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
198         else
199                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
200
201         spin_unlock_irqrestore(&pmu->lock, flags);
202
203         return val;
204 }
205
206 static void init_rc6(struct i915_pmu *pmu)
207 {
208         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
209         intel_wakeref_t wakeref;
210
211         with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
212                 pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
213                 pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
214                                         pmu->sample[__I915_SAMPLE_RC6].cur;
215                 pmu->sleep_last = ktime_get_raw();
216         }
217 }
218
219 static void park_rc6(struct drm_i915_private *i915)
220 {
221         struct i915_pmu *pmu = &i915->pmu;
222
223         pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
224         pmu->sleep_last = ktime_get_raw();
225 }
226
227 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
228 {
229         if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
230                 pmu->timer_enabled = true;
231                 pmu->timer_last = ktime_get();
232                 hrtimer_start_range_ns(&pmu->timer,
233                                        ns_to_ktime(PERIOD), 0,
234                                        HRTIMER_MODE_REL_PINNED);
235         }
236 }
237
238 void i915_pmu_gt_parked(struct drm_i915_private *i915)
239 {
240         struct i915_pmu *pmu = &i915->pmu;
241
242         if (!pmu->base.event_init)
243                 return;
244
245         spin_lock_irq(&pmu->lock);
246
247         park_rc6(i915);
248
249         /*
250          * Signal sampling timer to stop if only engine events are enabled and
251          * GPU went idle.
252          */
253         pmu->timer_enabled = pmu_needs_timer(pmu, false);
254
255         spin_unlock_irq(&pmu->lock);
256 }
257
258 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
259 {
260         struct i915_pmu *pmu = &i915->pmu;
261
262         if (!pmu->base.event_init)
263                 return;
264
265         spin_lock_irq(&pmu->lock);
266
267         /*
268          * Re-enable sampling timer when GPU goes active.
269          */
270         __i915_pmu_maybe_start_timer(pmu);
271
272         spin_unlock_irq(&pmu->lock);
273 }
274
275 static void
276 add_sample(struct i915_pmu_sample *sample, u32 val)
277 {
278         sample->cur += val;
279 }
280
281 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
282 {
283         /*
284          * We have to avoid concurrent mmio cache line access on gen7 or
285          * risk a machine hang. For a fun history lesson dig out the old
286          * userspace intel_gpu_top and run it on Ivybridge or Haswell!
287          */
288         return GRAPHICS_VER(i915) == 7;
289 }
290
291 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
292 {
293         struct intel_engine_pmu *pmu = &engine->pmu;
294         bool busy;
295         u32 val;
296
297         val = ENGINE_READ_FW(engine, RING_CTL);
298         if (val == 0) /* powerwell off => engine idle */
299                 return;
300
301         if (val & RING_WAIT)
302                 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
303         if (val & RING_WAIT_SEMAPHORE)
304                 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
305
306         /* No need to sample when busy stats are supported. */
307         if (intel_engine_supports_stats(engine))
308                 return;
309
310         /*
311          * While waiting on a semaphore or event, MI_MODE reports the
312          * ring as idle. However, previously using the seqno, and with
313          * execlists sampling, we account for the ring waiting as the
314          * engine being busy. Therefore, we record the sample as being
315          * busy if either waiting or !idle.
316          */
317         busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
318         if (!busy) {
319                 val = ENGINE_READ_FW(engine, RING_MI_MODE);
320                 busy = !(val & MODE_IDLE);
321         }
322         if (busy)
323                 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
324 }
325
326 static void
327 engines_sample(struct intel_gt *gt, unsigned int period_ns)
328 {
329         struct drm_i915_private *i915 = gt->i915;
330         struct intel_engine_cs *engine;
331         enum intel_engine_id id;
332         unsigned long flags;
333
334         if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
335                 return;
336
337         if (!intel_gt_pm_is_awake(gt))
338                 return;
339
340         for_each_engine(engine, gt, id) {
341                 if (!intel_engine_pm_get_if_awake(engine))
342                         continue;
343
344                 if (exclusive_mmio_access(i915)) {
345                         spin_lock_irqsave(&engine->uncore->lock, flags);
346                         engine_sample(engine, period_ns);
347                         spin_unlock_irqrestore(&engine->uncore->lock, flags);
348                 } else {
349                         engine_sample(engine, period_ns);
350                 }
351
352                 intel_engine_pm_put_async(engine);
353         }
354 }
355
356 static void
357 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
358 {
359         sample->cur += mul_u32_u32(val, mul);
360 }
361
362 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
363 {
364         return pmu->enable &
365                (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
366                 config_mask(I915_PMU_REQUESTED_FREQUENCY));
367 }
368
369 static void
370 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
371 {
372         struct drm_i915_private *i915 = gt->i915;
373         struct i915_pmu *pmu = &i915->pmu;
374         struct intel_rps *rps = &gt->rps;
375
376         if (!frequency_sampling_enabled(pmu))
377                 return;
378
379         /* Report 0/0 (actual/requested) frequency while parked. */
380         if (!intel_gt_pm_get_if_awake(gt))
381                 return;
382
383         if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
384                 u32 val;
385
386                 /*
387                  * We take a quick peek here without using forcewake
388                  * so that we don't perturb the system under observation
389                  * (forcewake => !rc6 => increased power use). We expect
390                  * that if the read fails because it is outside of the
391                  * mmio power well, then it will return 0 -- in which
392                  * case we assume the system is running at the intended
393                  * frequency. Fortunately, the read should rarely fail!
394                  */
395                 val = intel_rps_read_actual_frequency_fw(rps);
396                 if (!val)
397                         val = intel_gpu_freq(rps, rps->cur_freq);
398
399                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
400                                 val, period_ns / 1000);
401         }
402
403         if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
404                 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
405                                 intel_rps_get_requested_frequency(rps),
406                                 period_ns / 1000);
407         }
408
409         intel_gt_pm_put_async(gt);
410 }
411
412 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
413 {
414         struct drm_i915_private *i915 =
415                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
416         struct i915_pmu *pmu = &i915->pmu;
417         struct intel_gt *gt = to_gt(i915);
418         unsigned int period_ns;
419         ktime_t now;
420
421         if (!READ_ONCE(pmu->timer_enabled))
422                 return HRTIMER_NORESTART;
423
424         now = ktime_get();
425         period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
426         pmu->timer_last = now;
427
428         /*
429          * Strictly speaking the passed in period may not be 100% accurate for
430          * all internal calculation, since some amount of time can be spent on
431          * grabbing the forcewake. However the potential error from timer call-
432          * back delay greatly dominates this so we keep it simple.
433          */
434         engines_sample(gt, period_ns);
435         frequency_sample(gt, period_ns);
436
437         hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
438
439         return HRTIMER_RESTART;
440 }
441
442 static void i915_pmu_event_destroy(struct perf_event *event)
443 {
444         struct drm_i915_private *i915 =
445                 container_of(event->pmu, typeof(*i915), pmu.base);
446
447         drm_WARN_ON(&i915->drm, event->parent);
448
449         drm_dev_put(&i915->drm);
450 }
451
452 static int
453 engine_event_status(struct intel_engine_cs *engine,
454                     enum drm_i915_pmu_engine_sample sample)
455 {
456         switch (sample) {
457         case I915_SAMPLE_BUSY:
458         case I915_SAMPLE_WAIT:
459                 break;
460         case I915_SAMPLE_SEMA:
461                 if (GRAPHICS_VER(engine->i915) < 6)
462                         return -ENODEV;
463                 break;
464         default:
465                 return -ENOENT;
466         }
467
468         return 0;
469 }
470
471 static int
472 config_status(struct drm_i915_private *i915, u64 config)
473 {
474         struct intel_gt *gt = to_gt(i915);
475
476         switch (config) {
477         case I915_PMU_ACTUAL_FREQUENCY:
478                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
479                         /* Requires a mutex for sampling! */
480                         return -ENODEV;
481                 fallthrough;
482         case I915_PMU_REQUESTED_FREQUENCY:
483                 if (GRAPHICS_VER(i915) < 6)
484                         return -ENODEV;
485                 break;
486         case I915_PMU_INTERRUPTS:
487                 break;
488         case I915_PMU_RC6_RESIDENCY:
489                 if (!gt->rc6.supported)
490                         return -ENODEV;
491                 break;
492         case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
493                 break;
494         default:
495                 return -ENOENT;
496         }
497
498         return 0;
499 }
500
501 static int engine_event_init(struct perf_event *event)
502 {
503         struct drm_i915_private *i915 =
504                 container_of(event->pmu, typeof(*i915), pmu.base);
505         struct intel_engine_cs *engine;
506
507         engine = intel_engine_lookup_user(i915, engine_event_class(event),
508                                           engine_event_instance(event));
509         if (!engine)
510                 return -ENODEV;
511
512         return engine_event_status(engine, engine_event_sample(event));
513 }
514
515 static int i915_pmu_event_init(struct perf_event *event)
516 {
517         struct drm_i915_private *i915 =
518                 container_of(event->pmu, typeof(*i915), pmu.base);
519         struct i915_pmu *pmu = &i915->pmu;
520         int ret;
521
522         if (pmu->closed)
523                 return -ENODEV;
524
525         if (event->attr.type != event->pmu->type)
526                 return -ENOENT;
527
528         /* unsupported modes and filters */
529         if (event->attr.sample_period) /* no sampling */
530                 return -EINVAL;
531
532         if (has_branch_stack(event))
533                 return -EOPNOTSUPP;
534
535         if (event->cpu < 0)
536                 return -EINVAL;
537
538         /* only allow running on one cpu at a time */
539         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
540                 return -EINVAL;
541
542         if (is_engine_event(event))
543                 ret = engine_event_init(event);
544         else
545                 ret = config_status(i915, event->attr.config);
546         if (ret)
547                 return ret;
548
549         if (!event->parent) {
550                 drm_dev_get(&i915->drm);
551                 event->destroy = i915_pmu_event_destroy;
552         }
553
554         return 0;
555 }
556
557 static u64 __i915_pmu_event_read(struct perf_event *event)
558 {
559         struct drm_i915_private *i915 =
560                 container_of(event->pmu, typeof(*i915), pmu.base);
561         struct i915_pmu *pmu = &i915->pmu;
562         u64 val = 0;
563
564         if (is_engine_event(event)) {
565                 u8 sample = engine_event_sample(event);
566                 struct intel_engine_cs *engine;
567
568                 engine = intel_engine_lookup_user(i915,
569                                                   engine_event_class(event),
570                                                   engine_event_instance(event));
571
572                 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
573                         /* Do nothing */
574                 } else if (sample == I915_SAMPLE_BUSY &&
575                            intel_engine_supports_stats(engine)) {
576                         ktime_t unused;
577
578                         val = ktime_to_ns(intel_engine_get_busy_time(engine,
579                                                                      &unused));
580                 } else {
581                         val = engine->pmu.sample[sample].cur;
582                 }
583         } else {
584                 switch (event->attr.config) {
585                 case I915_PMU_ACTUAL_FREQUENCY:
586                         val =
587                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
588                                    USEC_PER_SEC /* to MHz */);
589                         break;
590                 case I915_PMU_REQUESTED_FREQUENCY:
591                         val =
592                            div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
593                                    USEC_PER_SEC /* to MHz */);
594                         break;
595                 case I915_PMU_INTERRUPTS:
596                         val = READ_ONCE(pmu->irq_count);
597                         break;
598                 case I915_PMU_RC6_RESIDENCY:
599                         val = get_rc6(to_gt(i915));
600                         break;
601                 case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
602                         val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
603                         break;
604                 }
605         }
606
607         return val;
608 }
609
610 static void i915_pmu_event_read(struct perf_event *event)
611 {
612         struct drm_i915_private *i915 =
613                 container_of(event->pmu, typeof(*i915), pmu.base);
614         struct hw_perf_event *hwc = &event->hw;
615         struct i915_pmu *pmu = &i915->pmu;
616         u64 prev, new;
617
618         if (pmu->closed) {
619                 event->hw.state = PERF_HES_STOPPED;
620                 return;
621         }
622 again:
623         prev = local64_read(&hwc->prev_count);
624         new = __i915_pmu_event_read(event);
625
626         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
627                 goto again;
628
629         local64_add(new - prev, &event->count);
630 }
631
632 static void i915_pmu_enable(struct perf_event *event)
633 {
634         struct drm_i915_private *i915 =
635                 container_of(event->pmu, typeof(*i915), pmu.base);
636         struct i915_pmu *pmu = &i915->pmu;
637         unsigned long flags;
638         unsigned int bit;
639
640         bit = event_bit(event);
641         if (bit == -1)
642                 goto update;
643
644         spin_lock_irqsave(&pmu->lock, flags);
645
646         /*
647          * Update the bitmask of enabled events and increment
648          * the event reference counter.
649          */
650         BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
651         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
652         GEM_BUG_ON(pmu->enable_count[bit] == ~0);
653
654         pmu->enable |= BIT_ULL(bit);
655         pmu->enable_count[bit]++;
656
657         /*
658          * Start the sampling timer if needed and not already enabled.
659          */
660         __i915_pmu_maybe_start_timer(pmu);
661
662         /*
663          * For per-engine events the bitmask and reference counting
664          * is stored per engine.
665          */
666         if (is_engine_event(event)) {
667                 u8 sample = engine_event_sample(event);
668                 struct intel_engine_cs *engine;
669
670                 engine = intel_engine_lookup_user(i915,
671                                                   engine_event_class(event),
672                                                   engine_event_instance(event));
673
674                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
675                              I915_ENGINE_SAMPLE_COUNT);
676                 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
677                              I915_ENGINE_SAMPLE_COUNT);
678                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
679                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
680                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
681
682                 engine->pmu.enable |= BIT(sample);
683                 engine->pmu.enable_count[sample]++;
684         }
685
686         spin_unlock_irqrestore(&pmu->lock, flags);
687
688 update:
689         /*
690          * Store the current counter value so we can report the correct delta
691          * for all listeners. Even when the event was already enabled and has
692          * an existing non-zero value.
693          */
694         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
695 }
696
697 static void i915_pmu_disable(struct perf_event *event)
698 {
699         struct drm_i915_private *i915 =
700                 container_of(event->pmu, typeof(*i915), pmu.base);
701         unsigned int bit = event_bit(event);
702         struct i915_pmu *pmu = &i915->pmu;
703         unsigned long flags;
704
705         if (bit == -1)
706                 return;
707
708         spin_lock_irqsave(&pmu->lock, flags);
709
710         if (is_engine_event(event)) {
711                 u8 sample = engine_event_sample(event);
712                 struct intel_engine_cs *engine;
713
714                 engine = intel_engine_lookup_user(i915,
715                                                   engine_event_class(event),
716                                                   engine_event_instance(event));
717
718                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
719                 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
720                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
721
722                 /*
723                  * Decrement the reference count and clear the enabled
724                  * bitmask when the last listener on an event goes away.
725                  */
726                 if (--engine->pmu.enable_count[sample] == 0)
727                         engine->pmu.enable &= ~BIT(sample);
728         }
729
730         GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
731         GEM_BUG_ON(pmu->enable_count[bit] == 0);
732         /*
733          * Decrement the reference count and clear the enabled
734          * bitmask when the last listener on an event goes away.
735          */
736         if (--pmu->enable_count[bit] == 0) {
737                 pmu->enable &= ~BIT_ULL(bit);
738                 pmu->timer_enabled &= pmu_needs_timer(pmu, true);
739         }
740
741         spin_unlock_irqrestore(&pmu->lock, flags);
742 }
743
744 static void i915_pmu_event_start(struct perf_event *event, int flags)
745 {
746         struct drm_i915_private *i915 =
747                 container_of(event->pmu, typeof(*i915), pmu.base);
748         struct i915_pmu *pmu = &i915->pmu;
749
750         if (pmu->closed)
751                 return;
752
753         i915_pmu_enable(event);
754         event->hw.state = 0;
755 }
756
757 static void i915_pmu_event_stop(struct perf_event *event, int flags)
758 {
759         if (flags & PERF_EF_UPDATE)
760                 i915_pmu_event_read(event);
761         i915_pmu_disable(event);
762         event->hw.state = PERF_HES_STOPPED;
763 }
764
765 static int i915_pmu_event_add(struct perf_event *event, int flags)
766 {
767         struct drm_i915_private *i915 =
768                 container_of(event->pmu, typeof(*i915), pmu.base);
769         struct i915_pmu *pmu = &i915->pmu;
770
771         if (pmu->closed)
772                 return -ENODEV;
773
774         if (flags & PERF_EF_START)
775                 i915_pmu_event_start(event, flags);
776
777         return 0;
778 }
779
780 static void i915_pmu_event_del(struct perf_event *event, int flags)
781 {
782         i915_pmu_event_stop(event, PERF_EF_UPDATE);
783 }
784
785 static int i915_pmu_event_event_idx(struct perf_event *event)
786 {
787         return 0;
788 }
789
790 struct i915_str_attribute {
791         struct device_attribute attr;
792         const char *str;
793 };
794
795 static ssize_t i915_pmu_format_show(struct device *dev,
796                                     struct device_attribute *attr, char *buf)
797 {
798         struct i915_str_attribute *eattr;
799
800         eattr = container_of(attr, struct i915_str_attribute, attr);
801         return sprintf(buf, "%s\n", eattr->str);
802 }
803
804 #define I915_PMU_FORMAT_ATTR(_name, _config) \
805         (&((struct i915_str_attribute[]) { \
806                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
807                   .str = _config, } \
808         })[0].attr.attr)
809
810 static struct attribute *i915_pmu_format_attrs[] = {
811         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
812         NULL,
813 };
814
815 static const struct attribute_group i915_pmu_format_attr_group = {
816         .name = "format",
817         .attrs = i915_pmu_format_attrs,
818 };
819
820 struct i915_ext_attribute {
821         struct device_attribute attr;
822         unsigned long val;
823 };
824
825 static ssize_t i915_pmu_event_show(struct device *dev,
826                                    struct device_attribute *attr, char *buf)
827 {
828         struct i915_ext_attribute *eattr;
829
830         eattr = container_of(attr, struct i915_ext_attribute, attr);
831         return sprintf(buf, "config=0x%lx\n", eattr->val);
832 }
833
834 static ssize_t cpumask_show(struct device *dev,
835                             struct device_attribute *attr, char *buf)
836 {
837         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
838 }
839
840 static DEVICE_ATTR_RO(cpumask);
841
842 static struct attribute *i915_cpumask_attrs[] = {
843         &dev_attr_cpumask.attr,
844         NULL,
845 };
846
847 static const struct attribute_group i915_pmu_cpumask_attr_group = {
848         .attrs = i915_cpumask_attrs,
849 };
850
851 #define __event(__config, __name, __unit) \
852 { \
853         .config = (__config), \
854         .name = (__name), \
855         .unit = (__unit), \
856 }
857
858 #define __engine_event(__sample, __name) \
859 { \
860         .sample = (__sample), \
861         .name = (__name), \
862 }
863
864 static struct i915_ext_attribute *
865 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
866 {
867         sysfs_attr_init(&attr->attr.attr);
868         attr->attr.attr.name = name;
869         attr->attr.attr.mode = 0444;
870         attr->attr.show = i915_pmu_event_show;
871         attr->val = config;
872
873         return ++attr;
874 }
875
876 static struct perf_pmu_events_attr *
877 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
878              const char *str)
879 {
880         sysfs_attr_init(&attr->attr.attr);
881         attr->attr.attr.name = name;
882         attr->attr.attr.mode = 0444;
883         attr->attr.show = perf_event_sysfs_show;
884         attr->event_str = str;
885
886         return ++attr;
887 }
888
889 static struct attribute **
890 create_event_attributes(struct i915_pmu *pmu)
891 {
892         struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
893         static const struct {
894                 u64 config;
895                 const char *name;
896                 const char *unit;
897         } events[] = {
898                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
899                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
900                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
901                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
902                 __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
903         };
904         static const struct {
905                 enum drm_i915_pmu_engine_sample sample;
906                 char *name;
907         } engine_events[] = {
908                 __engine_event(I915_SAMPLE_BUSY, "busy"),
909                 __engine_event(I915_SAMPLE_SEMA, "sema"),
910                 __engine_event(I915_SAMPLE_WAIT, "wait"),
911         };
912         unsigned int count = 0;
913         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
914         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
915         struct attribute **attr = NULL, **attr_iter;
916         struct intel_engine_cs *engine;
917         unsigned int i;
918
919         /* Count how many counters we will be exposing. */
920         for (i = 0; i < ARRAY_SIZE(events); i++) {
921                 if (!config_status(i915, events[i].config))
922                         count++;
923         }
924
925         for_each_uabi_engine(engine, i915) {
926                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
927                         if (!engine_event_status(engine,
928                                                  engine_events[i].sample))
929                                 count++;
930                 }
931         }
932
933         /* Allocate attribute objects and table. */
934         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
935         if (!i915_attr)
936                 goto err_alloc;
937
938         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
939         if (!pmu_attr)
940                 goto err_alloc;
941
942         /* Max one pointer of each attribute type plus a termination entry. */
943         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
944         if (!attr)
945                 goto err_alloc;
946
947         i915_iter = i915_attr;
948         pmu_iter = pmu_attr;
949         attr_iter = attr;
950
951         /* Initialize supported non-engine counters. */
952         for (i = 0; i < ARRAY_SIZE(events); i++) {
953                 char *str;
954
955                 if (config_status(i915, events[i].config))
956                         continue;
957
958                 str = kstrdup(events[i].name, GFP_KERNEL);
959                 if (!str)
960                         goto err;
961
962                 *attr_iter++ = &i915_iter->attr.attr;
963                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
964
965                 if (events[i].unit) {
966                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
967                         if (!str)
968                                 goto err;
969
970                         *attr_iter++ = &pmu_iter->attr.attr;
971                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
972                 }
973         }
974
975         /* Initialize supported engine counters. */
976         for_each_uabi_engine(engine, i915) {
977                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
978                         char *str;
979
980                         if (engine_event_status(engine,
981                                                 engine_events[i].sample))
982                                 continue;
983
984                         str = kasprintf(GFP_KERNEL, "%s-%s",
985                                         engine->name, engine_events[i].name);
986                         if (!str)
987                                 goto err;
988
989                         *attr_iter++ = &i915_iter->attr.attr;
990                         i915_iter =
991                                 add_i915_attr(i915_iter, str,
992                                               __I915_PMU_ENGINE(engine->uabi_class,
993                                                                 engine->uabi_instance,
994                                                                 engine_events[i].sample));
995
996                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
997                                         engine->name, engine_events[i].name);
998                         if (!str)
999                                 goto err;
1000
1001                         *attr_iter++ = &pmu_iter->attr.attr;
1002                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1003                 }
1004         }
1005
1006         pmu->i915_attr = i915_attr;
1007         pmu->pmu_attr = pmu_attr;
1008
1009         return attr;
1010
1011 err:;
1012         for (attr_iter = attr; *attr_iter; attr_iter++)
1013                 kfree((*attr_iter)->name);
1014
1015 err_alloc:
1016         kfree(attr);
1017         kfree(i915_attr);
1018         kfree(pmu_attr);
1019
1020         return NULL;
1021 }
1022
1023 static void free_event_attributes(struct i915_pmu *pmu)
1024 {
1025         struct attribute **attr_iter = pmu->events_attr_group.attrs;
1026
1027         for (; *attr_iter; attr_iter++)
1028                 kfree((*attr_iter)->name);
1029
1030         kfree(pmu->events_attr_group.attrs);
1031         kfree(pmu->i915_attr);
1032         kfree(pmu->pmu_attr);
1033
1034         pmu->events_attr_group.attrs = NULL;
1035         pmu->i915_attr = NULL;
1036         pmu->pmu_attr = NULL;
1037 }
1038
1039 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1040 {
1041         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1042
1043         GEM_BUG_ON(!pmu->base.event_init);
1044
1045         /* Select the first online CPU as a designated reader. */
1046         if (cpumask_empty(&i915_pmu_cpumask))
1047                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1048
1049         return 0;
1050 }
1051
1052 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1053 {
1054         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1055         unsigned int target = i915_pmu_target_cpu;
1056
1057         GEM_BUG_ON(!pmu->base.event_init);
1058
1059         /*
1060          * Unregistering an instance generates a CPU offline event which we must
1061          * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1062          */
1063         if (pmu->closed)
1064                 return 0;
1065
1066         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1067                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1068
1069                 /* Migrate events if there is a valid target */
1070                 if (target < nr_cpu_ids) {
1071                         cpumask_set_cpu(target, &i915_pmu_cpumask);
1072                         i915_pmu_target_cpu = target;
1073                 }
1074         }
1075
1076         if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1077                 perf_pmu_migrate_context(&pmu->base, cpu, target);
1078                 pmu->cpuhp.cpu = target;
1079         }
1080
1081         return 0;
1082 }
1083
1084 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1085
1086 int i915_pmu_init(void)
1087 {
1088         int ret;
1089
1090         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1091                                       "perf/x86/intel/i915:online",
1092                                       i915_pmu_cpu_online,
1093                                       i915_pmu_cpu_offline);
1094         if (ret < 0)
1095                 pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1096                           ret);
1097         else
1098                 cpuhp_slot = ret;
1099
1100         return 0;
1101 }
1102
1103 void i915_pmu_exit(void)
1104 {
1105         if (cpuhp_slot != CPUHP_INVALID)
1106                 cpuhp_remove_multi_state(cpuhp_slot);
1107 }
1108
1109 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1110 {
1111         if (cpuhp_slot == CPUHP_INVALID)
1112                 return -EINVAL;
1113
1114         return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1115 }
1116
1117 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1118 {
1119         cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1120 }
1121
1122 static bool is_igp(struct drm_i915_private *i915)
1123 {
1124         struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1125
1126         /* IGP is 0000:00:02.0 */
1127         return pci_domain_nr(pdev->bus) == 0 &&
1128                pdev->bus->number == 0 &&
1129                PCI_SLOT(pdev->devfn) == 2 &&
1130                PCI_FUNC(pdev->devfn) == 0;
1131 }
1132
1133 void i915_pmu_register(struct drm_i915_private *i915)
1134 {
1135         struct i915_pmu *pmu = &i915->pmu;
1136         const struct attribute_group *attr_groups[] = {
1137                 &i915_pmu_format_attr_group,
1138                 &pmu->events_attr_group,
1139                 &i915_pmu_cpumask_attr_group,
1140                 NULL
1141         };
1142
1143         int ret = -ENOMEM;
1144
1145         if (GRAPHICS_VER(i915) <= 2) {
1146                 drm_info(&i915->drm, "PMU not supported for this GPU.");
1147                 return;
1148         }
1149
1150         spin_lock_init(&pmu->lock);
1151         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1152         pmu->timer.function = i915_sample;
1153         pmu->cpuhp.cpu = -1;
1154         init_rc6(pmu);
1155
1156         if (!is_igp(i915)) {
1157                 pmu->name = kasprintf(GFP_KERNEL,
1158                                       "i915_%s",
1159                                       dev_name(i915->drm.dev));
1160                 if (pmu->name) {
1161                         /* tools/perf reserves colons as special. */
1162                         strreplace((char *)pmu->name, ':', '_');
1163                 }
1164         } else {
1165                 pmu->name = "i915";
1166         }
1167         if (!pmu->name)
1168                 goto err;
1169
1170         pmu->events_attr_group.name = "events";
1171         pmu->events_attr_group.attrs = create_event_attributes(pmu);
1172         if (!pmu->events_attr_group.attrs)
1173                 goto err_name;
1174
1175         pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1176                                         GFP_KERNEL);
1177         if (!pmu->base.attr_groups)
1178                 goto err_attr;
1179
1180         pmu->base.module        = THIS_MODULE;
1181         pmu->base.task_ctx_nr   = perf_invalid_context;
1182         pmu->base.event_init    = i915_pmu_event_init;
1183         pmu->base.add           = i915_pmu_event_add;
1184         pmu->base.del           = i915_pmu_event_del;
1185         pmu->base.start         = i915_pmu_event_start;
1186         pmu->base.stop          = i915_pmu_event_stop;
1187         pmu->base.read          = i915_pmu_event_read;
1188         pmu->base.event_idx     = i915_pmu_event_event_idx;
1189
1190         ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1191         if (ret)
1192                 goto err_groups;
1193
1194         ret = i915_pmu_register_cpuhp_state(pmu);
1195         if (ret)
1196                 goto err_unreg;
1197
1198         return;
1199
1200 err_unreg:
1201         perf_pmu_unregister(&pmu->base);
1202 err_groups:
1203         kfree(pmu->base.attr_groups);
1204 err_attr:
1205         pmu->base.event_init = NULL;
1206         free_event_attributes(pmu);
1207 err_name:
1208         if (!is_igp(i915))
1209                 kfree(pmu->name);
1210 err:
1211         drm_notice(&i915->drm, "Failed to register PMU!\n");
1212 }
1213
1214 void i915_pmu_unregister(struct drm_i915_private *i915)
1215 {
1216         struct i915_pmu *pmu = &i915->pmu;
1217
1218         if (!pmu->base.event_init)
1219                 return;
1220
1221         /*
1222          * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1223          * ensures all currently executing ones will have exited before we
1224          * proceed with unregistration.
1225          */
1226         pmu->closed = true;
1227         synchronize_rcu();
1228
1229         hrtimer_cancel(&pmu->timer);
1230
1231         i915_pmu_unregister_cpuhp_state(pmu);
1232
1233         perf_pmu_unregister(&pmu->base);
1234         pmu->base.event_init = NULL;
1235         kfree(pmu->base.attr_groups);
1236         if (!is_igp(i915))
1237                 kfree(pmu->name);
1238         free_event_attributes(pmu);
1239 }
This page took 0.107208 seconds and 4 git commands to generate.