]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/i915_pmu.c
Merge tag 'for-linus-4.18-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / drivers / gpu / drm / i915 / i915_pmu.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6
7 #include "i915_pmu.h"
8 #include "intel_ringbuffer.h"
9 #include "i915_drv.h"
10
11 /* Frequency for the sampling timer for events which need it. */
12 #define FREQUENCY 200
13 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
14
15 #define ENGINE_SAMPLE_MASK \
16         (BIT(I915_SAMPLE_BUSY) | \
17          BIT(I915_SAMPLE_WAIT) | \
18          BIT(I915_SAMPLE_SEMA))
19
20 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
21
22 static cpumask_t i915_pmu_cpumask;
23
24 static u8 engine_config_sample(u64 config)
25 {
26         return config & I915_PMU_SAMPLE_MASK;
27 }
28
29 static u8 engine_event_sample(struct perf_event *event)
30 {
31         return engine_config_sample(event->attr.config);
32 }
33
34 static u8 engine_event_class(struct perf_event *event)
35 {
36         return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
37 }
38
39 static u8 engine_event_instance(struct perf_event *event)
40 {
41         return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
42 }
43
44 static bool is_engine_config(u64 config)
45 {
46         return config < __I915_PMU_OTHER(0);
47 }
48
49 static unsigned int config_enabled_bit(u64 config)
50 {
51         if (is_engine_config(config))
52                 return engine_config_sample(config);
53         else
54                 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
55 }
56
57 static u64 config_enabled_mask(u64 config)
58 {
59         return BIT_ULL(config_enabled_bit(config));
60 }
61
62 static bool is_engine_event(struct perf_event *event)
63 {
64         return is_engine_config(event->attr.config);
65 }
66
67 static unsigned int event_enabled_bit(struct perf_event *event)
68 {
69         return config_enabled_bit(event->attr.config);
70 }
71
72 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
73 {
74         u64 enable;
75
76         /*
77          * Only some counters need the sampling timer.
78          *
79          * We start with a bitmask of all currently enabled events.
80          */
81         enable = i915->pmu.enable;
82
83         /*
84          * Mask out all the ones which do not need the timer, or in
85          * other words keep all the ones that could need the timer.
86          */
87         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
88                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
89                   ENGINE_SAMPLE_MASK;
90
91         /*
92          * When the GPU is idle per-engine counters do not need to be
93          * running so clear those bits out.
94          */
95         if (!gpu_active)
96                 enable &= ~ENGINE_SAMPLE_MASK;
97         /*
98          * Also there is software busyness tracking available we do not
99          * need the timer for I915_SAMPLE_BUSY counter.
100          *
101          * Use RCS as proxy for all engines.
102          */
103         else if (intel_engine_supports_stats(i915->engine[RCS]))
104                 enable &= ~BIT(I915_SAMPLE_BUSY);
105
106         /*
107          * If some bits remain it means we need the sampling timer running.
108          */
109         return enable;
110 }
111
112 void i915_pmu_gt_parked(struct drm_i915_private *i915)
113 {
114         if (!i915->pmu.base.event_init)
115                 return;
116
117         spin_lock_irq(&i915->pmu.lock);
118         /*
119          * Signal sampling timer to stop if only engine events are enabled and
120          * GPU went idle.
121          */
122         i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
123         spin_unlock_irq(&i915->pmu.lock);
124 }
125
126 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
127 {
128         if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
129                 i915->pmu.timer_enabled = true;
130                 hrtimer_start_range_ns(&i915->pmu.timer,
131                                        ns_to_ktime(PERIOD), 0,
132                                        HRTIMER_MODE_REL_PINNED);
133         }
134 }
135
136 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
137 {
138         if (!i915->pmu.base.event_init)
139                 return;
140
141         spin_lock_irq(&i915->pmu.lock);
142         /*
143          * Re-enable sampling timer when GPU goes active.
144          */
145         __i915_pmu_maybe_start_timer(i915);
146         spin_unlock_irq(&i915->pmu.lock);
147 }
148
149 static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
150 {
151         if (!fw)
152                 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
153
154         return true;
155 }
156
157 static void
158 update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
159 {
160         sample->cur += mul_u32_u32(val, unit);
161 }
162
163 static void engines_sample(struct drm_i915_private *dev_priv)
164 {
165         struct intel_engine_cs *engine;
166         enum intel_engine_id id;
167         bool fw = false;
168
169         if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
170                 return;
171
172         if (!dev_priv->gt.awake)
173                 return;
174
175         if (!intel_runtime_pm_get_if_in_use(dev_priv))
176                 return;
177
178         for_each_engine(engine, dev_priv, id) {
179                 u32 current_seqno = intel_engine_get_seqno(engine);
180                 u32 last_seqno = intel_engine_last_submit(engine);
181                 u32 val;
182
183                 val = !i915_seqno_passed(current_seqno, last_seqno);
184
185                 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
186                               PERIOD, val);
187
188                 if (val && (engine->pmu.enable &
189                     (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
190                         fw = grab_forcewake(dev_priv, fw);
191
192                         val = I915_READ_FW(RING_CTL(engine->mmio_base));
193                 } else {
194                         val = 0;
195                 }
196
197                 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
198                               PERIOD, !!(val & RING_WAIT));
199
200                 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
201                               PERIOD, !!(val & RING_WAIT_SEMAPHORE));
202         }
203
204         if (fw)
205                 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
206
207         intel_runtime_pm_put(dev_priv);
208 }
209
210 static void frequency_sample(struct drm_i915_private *dev_priv)
211 {
212         if (dev_priv->pmu.enable &
213             config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
214                 u32 val;
215
216                 val = dev_priv->gt_pm.rps.cur_freq;
217                 if (dev_priv->gt.awake &&
218                     intel_runtime_pm_get_if_in_use(dev_priv)) {
219                         val = intel_get_cagf(dev_priv,
220                                              I915_READ_NOTRACE(GEN6_RPSTAT1));
221                         intel_runtime_pm_put(dev_priv);
222                 }
223
224                 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
225                               1, intel_gpu_freq(dev_priv, val));
226         }
227
228         if (dev_priv->pmu.enable &
229             config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
230                 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
231                               intel_gpu_freq(dev_priv,
232                                              dev_priv->gt_pm.rps.cur_freq));
233         }
234 }
235
236 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
237 {
238         struct drm_i915_private *i915 =
239                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
240
241         if (!READ_ONCE(i915->pmu.timer_enabled))
242                 return HRTIMER_NORESTART;
243
244         engines_sample(i915);
245         frequency_sample(i915);
246
247         hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
248         return HRTIMER_RESTART;
249 }
250
251 static u64 count_interrupts(struct drm_i915_private *i915)
252 {
253         /* open-coded kstat_irqs() */
254         struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
255         u64 sum = 0;
256         int cpu;
257
258         if (!desc || !desc->kstat_irqs)
259                 return 0;
260
261         for_each_possible_cpu(cpu)
262                 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
263
264         return sum;
265 }
266
267 static void engine_event_destroy(struct perf_event *event)
268 {
269         struct drm_i915_private *i915 =
270                 container_of(event->pmu, typeof(*i915), pmu.base);
271         struct intel_engine_cs *engine;
272
273         engine = intel_engine_lookup_user(i915,
274                                           engine_event_class(event),
275                                           engine_event_instance(event));
276         if (WARN_ON_ONCE(!engine))
277                 return;
278
279         if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
280             intel_engine_supports_stats(engine))
281                 intel_disable_engine_stats(engine);
282 }
283
284 static void i915_pmu_event_destroy(struct perf_event *event)
285 {
286         WARN_ON(event->parent);
287
288         if (is_engine_event(event))
289                 engine_event_destroy(event);
290 }
291
292 static int
293 engine_event_status(struct intel_engine_cs *engine,
294                     enum drm_i915_pmu_engine_sample sample)
295 {
296         switch (sample) {
297         case I915_SAMPLE_BUSY:
298         case I915_SAMPLE_WAIT:
299                 break;
300         case I915_SAMPLE_SEMA:
301                 if (INTEL_GEN(engine->i915) < 6)
302                         return -ENODEV;
303                 break;
304         default:
305                 return -ENOENT;
306         }
307
308         return 0;
309 }
310
311 static int
312 config_status(struct drm_i915_private *i915, u64 config)
313 {
314         switch (config) {
315         case I915_PMU_ACTUAL_FREQUENCY:
316                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
317                         /* Requires a mutex for sampling! */
318                         return -ENODEV;
319                 /* Fall-through. */
320         case I915_PMU_REQUESTED_FREQUENCY:
321                 if (INTEL_GEN(i915) < 6)
322                         return -ENODEV;
323                 break;
324         case I915_PMU_INTERRUPTS:
325                 break;
326         case I915_PMU_RC6_RESIDENCY:
327                 if (!HAS_RC6(i915))
328                         return -ENODEV;
329                 break;
330         default:
331                 return -ENOENT;
332         }
333
334         return 0;
335 }
336
337 static int engine_event_init(struct perf_event *event)
338 {
339         struct drm_i915_private *i915 =
340                 container_of(event->pmu, typeof(*i915), pmu.base);
341         struct intel_engine_cs *engine;
342         u8 sample;
343         int ret;
344
345         engine = intel_engine_lookup_user(i915, engine_event_class(event),
346                                           engine_event_instance(event));
347         if (!engine)
348                 return -ENODEV;
349
350         sample = engine_event_sample(event);
351         ret = engine_event_status(engine, sample);
352         if (ret)
353                 return ret;
354
355         if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
356                 ret = intel_enable_engine_stats(engine);
357
358         return ret;
359 }
360
361 static int i915_pmu_event_init(struct perf_event *event)
362 {
363         struct drm_i915_private *i915 =
364                 container_of(event->pmu, typeof(*i915), pmu.base);
365         int ret;
366
367         if (event->attr.type != event->pmu->type)
368                 return -ENOENT;
369
370         /* unsupported modes and filters */
371         if (event->attr.sample_period) /* no sampling */
372                 return -EINVAL;
373
374         if (has_branch_stack(event))
375                 return -EOPNOTSUPP;
376
377         if (event->cpu < 0)
378                 return -EINVAL;
379
380         /* only allow running on one cpu at a time */
381         if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
382                 return -EINVAL;
383
384         if (is_engine_event(event))
385                 ret = engine_event_init(event);
386         else
387                 ret = config_status(i915, event->attr.config);
388         if (ret)
389                 return ret;
390
391         if (!event->parent)
392                 event->destroy = i915_pmu_event_destroy;
393
394         return 0;
395 }
396
397 static u64 __get_rc6(struct drm_i915_private *i915)
398 {
399         u64 val;
400
401         val = intel_rc6_residency_ns(i915,
402                                      IS_VALLEYVIEW(i915) ?
403                                      VLV_GT_RENDER_RC6 :
404                                      GEN6_GT_GFX_RC6);
405
406         if (HAS_RC6p(i915))
407                 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
408
409         if (HAS_RC6pp(i915))
410                 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
411
412         return val;
413 }
414
415 static u64 get_rc6(struct drm_i915_private *i915)
416 {
417 #if IS_ENABLED(CONFIG_PM)
418         unsigned long flags;
419         u64 val;
420
421         if (intel_runtime_pm_get_if_in_use(i915)) {
422                 val = __get_rc6(i915);
423                 intel_runtime_pm_put(i915);
424
425                 /*
426                  * If we are coming back from being runtime suspended we must
427                  * be careful not to report a larger value than returned
428                  * previously.
429                  */
430
431                 spin_lock_irqsave(&i915->pmu.lock, flags);
432
433                 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
434                         i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
435                         i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
436                 } else {
437                         val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
438                 }
439
440                 spin_unlock_irqrestore(&i915->pmu.lock, flags);
441         } else {
442                 struct pci_dev *pdev = i915->drm.pdev;
443                 struct device *kdev = &pdev->dev;
444
445                 /*
446                  * We are runtime suspended.
447                  *
448                  * Report the delta from when the device was suspended to now,
449                  * on top of the last known real value, as the approximated RC6
450                  * counter value.
451                  */
452                 spin_lock_irqsave(&i915->pmu.lock, flags);
453                 spin_lock(&kdev->power.lock);
454
455                 /*
456                  * After the above branch intel_runtime_pm_get_if_in_use failed
457                  * to get the runtime PM reference we cannot assume we are in
458                  * runtime suspend since we can either: a) race with coming out
459                  * of it before we took the power.lock, or b) there are other
460                  * states than suspended which can bring us here.
461                  *
462                  * We need to double-check that we are indeed currently runtime
463                  * suspended and if not we cannot do better than report the last
464                  * known RC6 value.
465                  */
466                 if (kdev->power.runtime_status == RPM_SUSPENDED) {
467                         if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
468                                 i915->pmu.suspended_jiffies_last =
469                                                   kdev->power.suspended_jiffies;
470
471                         val = kdev->power.suspended_jiffies -
472                               i915->pmu.suspended_jiffies_last;
473                         val += jiffies - kdev->power.accounting_timestamp;
474
475                         val = jiffies_to_nsecs(val);
476                         val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
477
478                         i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
479                 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
480                         val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
481                 } else {
482                         val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
483                 }
484
485                 spin_unlock(&kdev->power.lock);
486                 spin_unlock_irqrestore(&i915->pmu.lock, flags);
487         }
488
489         return val;
490 #else
491         return __get_rc6(i915);
492 #endif
493 }
494
495 static u64 __i915_pmu_event_read(struct perf_event *event)
496 {
497         struct drm_i915_private *i915 =
498                 container_of(event->pmu, typeof(*i915), pmu.base);
499         u64 val = 0;
500
501         if (is_engine_event(event)) {
502                 u8 sample = engine_event_sample(event);
503                 struct intel_engine_cs *engine;
504
505                 engine = intel_engine_lookup_user(i915,
506                                                   engine_event_class(event),
507                                                   engine_event_instance(event));
508
509                 if (WARN_ON_ONCE(!engine)) {
510                         /* Do nothing */
511                 } else if (sample == I915_SAMPLE_BUSY &&
512                            intel_engine_supports_stats(engine)) {
513                         val = ktime_to_ns(intel_engine_get_busy_time(engine));
514                 } else {
515                         val = engine->pmu.sample[sample].cur;
516                 }
517         } else {
518                 switch (event->attr.config) {
519                 case I915_PMU_ACTUAL_FREQUENCY:
520                         val =
521                            div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
522                                    FREQUENCY);
523                         break;
524                 case I915_PMU_REQUESTED_FREQUENCY:
525                         val =
526                            div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
527                                    FREQUENCY);
528                         break;
529                 case I915_PMU_INTERRUPTS:
530                         val = count_interrupts(i915);
531                         break;
532                 case I915_PMU_RC6_RESIDENCY:
533                         val = get_rc6(i915);
534                         break;
535                 }
536         }
537
538         return val;
539 }
540
541 static void i915_pmu_event_read(struct perf_event *event)
542 {
543         struct hw_perf_event *hwc = &event->hw;
544         u64 prev, new;
545
546 again:
547         prev = local64_read(&hwc->prev_count);
548         new = __i915_pmu_event_read(event);
549
550         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
551                 goto again;
552
553         local64_add(new - prev, &event->count);
554 }
555
556 static void i915_pmu_enable(struct perf_event *event)
557 {
558         struct drm_i915_private *i915 =
559                 container_of(event->pmu, typeof(*i915), pmu.base);
560         unsigned int bit = event_enabled_bit(event);
561         unsigned long flags;
562
563         spin_lock_irqsave(&i915->pmu.lock, flags);
564
565         /*
566          * Update the bitmask of enabled events and increment
567          * the event reference counter.
568          */
569         GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
570         GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
571         i915->pmu.enable |= BIT_ULL(bit);
572         i915->pmu.enable_count[bit]++;
573
574         /*
575          * Start the sampling timer if needed and not already enabled.
576          */
577         __i915_pmu_maybe_start_timer(i915);
578
579         /*
580          * For per-engine events the bitmask and reference counting
581          * is stored per engine.
582          */
583         if (is_engine_event(event)) {
584                 u8 sample = engine_event_sample(event);
585                 struct intel_engine_cs *engine;
586
587                 engine = intel_engine_lookup_user(i915,
588                                                   engine_event_class(event),
589                                                   engine_event_instance(event));
590                 GEM_BUG_ON(!engine);
591                 engine->pmu.enable |= BIT(sample);
592
593                 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
594                 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
595                 engine->pmu.enable_count[sample]++;
596         }
597
598         spin_unlock_irqrestore(&i915->pmu.lock, flags);
599
600         /*
601          * Store the current counter value so we can report the correct delta
602          * for all listeners. Even when the event was already enabled and has
603          * an existing non-zero value.
604          */
605         local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
606 }
607
608 static void i915_pmu_disable(struct perf_event *event)
609 {
610         struct drm_i915_private *i915 =
611                 container_of(event->pmu, typeof(*i915), pmu.base);
612         unsigned int bit = event_enabled_bit(event);
613         unsigned long flags;
614
615         spin_lock_irqsave(&i915->pmu.lock, flags);
616
617         if (is_engine_event(event)) {
618                 u8 sample = engine_event_sample(event);
619                 struct intel_engine_cs *engine;
620
621                 engine = intel_engine_lookup_user(i915,
622                                                   engine_event_class(event),
623                                                   engine_event_instance(event));
624                 GEM_BUG_ON(!engine);
625                 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
626                 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
627                 /*
628                  * Decrement the reference count and clear the enabled
629                  * bitmask when the last listener on an event goes away.
630                  */
631                 if (--engine->pmu.enable_count[sample] == 0)
632                         engine->pmu.enable &= ~BIT(sample);
633         }
634
635         GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
636         GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
637         /*
638          * Decrement the reference count and clear the enabled
639          * bitmask when the last listener on an event goes away.
640          */
641         if (--i915->pmu.enable_count[bit] == 0) {
642                 i915->pmu.enable &= ~BIT_ULL(bit);
643                 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
644         }
645
646         spin_unlock_irqrestore(&i915->pmu.lock, flags);
647 }
648
649 static void i915_pmu_event_start(struct perf_event *event, int flags)
650 {
651         i915_pmu_enable(event);
652         event->hw.state = 0;
653 }
654
655 static void i915_pmu_event_stop(struct perf_event *event, int flags)
656 {
657         if (flags & PERF_EF_UPDATE)
658                 i915_pmu_event_read(event);
659         i915_pmu_disable(event);
660         event->hw.state = PERF_HES_STOPPED;
661 }
662
663 static int i915_pmu_event_add(struct perf_event *event, int flags)
664 {
665         if (flags & PERF_EF_START)
666                 i915_pmu_event_start(event, flags);
667
668         return 0;
669 }
670
671 static void i915_pmu_event_del(struct perf_event *event, int flags)
672 {
673         i915_pmu_event_stop(event, PERF_EF_UPDATE);
674 }
675
676 static int i915_pmu_event_event_idx(struct perf_event *event)
677 {
678         return 0;
679 }
680
681 struct i915_str_attribute {
682         struct device_attribute attr;
683         const char *str;
684 };
685
686 static ssize_t i915_pmu_format_show(struct device *dev,
687                                     struct device_attribute *attr, char *buf)
688 {
689         struct i915_str_attribute *eattr;
690
691         eattr = container_of(attr, struct i915_str_attribute, attr);
692         return sprintf(buf, "%s\n", eattr->str);
693 }
694
695 #define I915_PMU_FORMAT_ATTR(_name, _config) \
696         (&((struct i915_str_attribute[]) { \
697                 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
698                   .str = _config, } \
699         })[0].attr.attr)
700
701 static struct attribute *i915_pmu_format_attrs[] = {
702         I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
703         NULL,
704 };
705
706 static const struct attribute_group i915_pmu_format_attr_group = {
707         .name = "format",
708         .attrs = i915_pmu_format_attrs,
709 };
710
711 struct i915_ext_attribute {
712         struct device_attribute attr;
713         unsigned long val;
714 };
715
716 static ssize_t i915_pmu_event_show(struct device *dev,
717                                    struct device_attribute *attr, char *buf)
718 {
719         struct i915_ext_attribute *eattr;
720
721         eattr = container_of(attr, struct i915_ext_attribute, attr);
722         return sprintf(buf, "config=0x%lx\n", eattr->val);
723 }
724
725 static struct attribute_group i915_pmu_events_attr_group = {
726         .name = "events",
727         /* Patch in attrs at runtime. */
728 };
729
730 static ssize_t
731 i915_pmu_get_attr_cpumask(struct device *dev,
732                           struct device_attribute *attr,
733                           char *buf)
734 {
735         return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
736 }
737
738 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
739
740 static struct attribute *i915_cpumask_attrs[] = {
741         &dev_attr_cpumask.attr,
742         NULL,
743 };
744
745 static const struct attribute_group i915_pmu_cpumask_attr_group = {
746         .attrs = i915_cpumask_attrs,
747 };
748
749 static const struct attribute_group *i915_pmu_attr_groups[] = {
750         &i915_pmu_format_attr_group,
751         &i915_pmu_events_attr_group,
752         &i915_pmu_cpumask_attr_group,
753         NULL
754 };
755
756 #define __event(__config, __name, __unit) \
757 { \
758         .config = (__config), \
759         .name = (__name), \
760         .unit = (__unit), \
761 }
762
763 #define __engine_event(__sample, __name) \
764 { \
765         .sample = (__sample), \
766         .name = (__name), \
767 }
768
769 static struct i915_ext_attribute *
770 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
771 {
772         sysfs_attr_init(&attr->attr.attr);
773         attr->attr.attr.name = name;
774         attr->attr.attr.mode = 0444;
775         attr->attr.show = i915_pmu_event_show;
776         attr->val = config;
777
778         return ++attr;
779 }
780
781 static struct perf_pmu_events_attr *
782 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
783              const char *str)
784 {
785         sysfs_attr_init(&attr->attr.attr);
786         attr->attr.attr.name = name;
787         attr->attr.attr.mode = 0444;
788         attr->attr.show = perf_event_sysfs_show;
789         attr->event_str = str;
790
791         return ++attr;
792 }
793
794 static struct attribute **
795 create_event_attributes(struct drm_i915_private *i915)
796 {
797         static const struct {
798                 u64 config;
799                 const char *name;
800                 const char *unit;
801         } events[] = {
802                 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
803                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
804                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
805                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
806         };
807         static const struct {
808                 enum drm_i915_pmu_engine_sample sample;
809                 char *name;
810         } engine_events[] = {
811                 __engine_event(I915_SAMPLE_BUSY, "busy"),
812                 __engine_event(I915_SAMPLE_SEMA, "sema"),
813                 __engine_event(I915_SAMPLE_WAIT, "wait"),
814         };
815         unsigned int count = 0;
816         struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
817         struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
818         struct attribute **attr = NULL, **attr_iter;
819         struct intel_engine_cs *engine;
820         enum intel_engine_id id;
821         unsigned int i;
822
823         /* Count how many counters we will be exposing. */
824         for (i = 0; i < ARRAY_SIZE(events); i++) {
825                 if (!config_status(i915, events[i].config))
826                         count++;
827         }
828
829         for_each_engine(engine, i915, id) {
830                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
831                         if (!engine_event_status(engine,
832                                                  engine_events[i].sample))
833                                 count++;
834                 }
835         }
836
837         /* Allocate attribute objects and table. */
838         i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
839         if (!i915_attr)
840                 goto err_alloc;
841
842         pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
843         if (!pmu_attr)
844                 goto err_alloc;
845
846         /* Max one pointer of each attribute type plus a termination entry. */
847         attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
848         if (!attr)
849                 goto err_alloc;
850
851         i915_iter = i915_attr;
852         pmu_iter = pmu_attr;
853         attr_iter = attr;
854
855         /* Initialize supported non-engine counters. */
856         for (i = 0; i < ARRAY_SIZE(events); i++) {
857                 char *str;
858
859                 if (config_status(i915, events[i].config))
860                         continue;
861
862                 str = kstrdup(events[i].name, GFP_KERNEL);
863                 if (!str)
864                         goto err;
865
866                 *attr_iter++ = &i915_iter->attr.attr;
867                 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
868
869                 if (events[i].unit) {
870                         str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
871                         if (!str)
872                                 goto err;
873
874                         *attr_iter++ = &pmu_iter->attr.attr;
875                         pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
876                 }
877         }
878
879         /* Initialize supported engine counters. */
880         for_each_engine(engine, i915, id) {
881                 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
882                         char *str;
883
884                         if (engine_event_status(engine,
885                                                 engine_events[i].sample))
886                                 continue;
887
888                         str = kasprintf(GFP_KERNEL, "%s-%s",
889                                         engine->name, engine_events[i].name);
890                         if (!str)
891                                 goto err;
892
893                         *attr_iter++ = &i915_iter->attr.attr;
894                         i915_iter =
895                                 add_i915_attr(i915_iter, str,
896                                               __I915_PMU_ENGINE(engine->uabi_class,
897                                                                 engine->instance,
898                                                                 engine_events[i].sample));
899
900                         str = kasprintf(GFP_KERNEL, "%s-%s.unit",
901                                         engine->name, engine_events[i].name);
902                         if (!str)
903                                 goto err;
904
905                         *attr_iter++ = &pmu_iter->attr.attr;
906                         pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
907                 }
908         }
909
910         i915->pmu.i915_attr = i915_attr;
911         i915->pmu.pmu_attr = pmu_attr;
912
913         return attr;
914
915 err:;
916         for (attr_iter = attr; *attr_iter; attr_iter++)
917                 kfree((*attr_iter)->name);
918
919 err_alloc:
920         kfree(attr);
921         kfree(i915_attr);
922         kfree(pmu_attr);
923
924         return NULL;
925 }
926
927 static void free_event_attributes(struct drm_i915_private *i915)
928 {
929         struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
930
931         for (; *attr_iter; attr_iter++)
932                 kfree((*attr_iter)->name);
933
934         kfree(i915_pmu_events_attr_group.attrs);
935         kfree(i915->pmu.i915_attr);
936         kfree(i915->pmu.pmu_attr);
937
938         i915_pmu_events_attr_group.attrs = NULL;
939         i915->pmu.i915_attr = NULL;
940         i915->pmu.pmu_attr = NULL;
941 }
942
943 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
944 {
945         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
946
947         GEM_BUG_ON(!pmu->base.event_init);
948
949         /* Select the first online CPU as a designated reader. */
950         if (!cpumask_weight(&i915_pmu_cpumask))
951                 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
952
953         return 0;
954 }
955
956 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
957 {
958         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
959         unsigned int target;
960
961         GEM_BUG_ON(!pmu->base.event_init);
962
963         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
964                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
965                 /* Migrate events if there is a valid target */
966                 if (target < nr_cpu_ids) {
967                         cpumask_set_cpu(target, &i915_pmu_cpumask);
968                         perf_pmu_migrate_context(&pmu->base, cpu, target);
969                 }
970         }
971
972         return 0;
973 }
974
975 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
976
977 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
978 {
979         enum cpuhp_state slot;
980         int ret;
981
982         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
983                                       "perf/x86/intel/i915:online",
984                                       i915_pmu_cpu_online,
985                                       i915_pmu_cpu_offline);
986         if (ret < 0)
987                 return ret;
988
989         slot = ret;
990         ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
991         if (ret) {
992                 cpuhp_remove_multi_state(slot);
993                 return ret;
994         }
995
996         cpuhp_slot = slot;
997         return 0;
998 }
999
1000 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1001 {
1002         WARN_ON(cpuhp_slot == CPUHP_INVALID);
1003         WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1004         cpuhp_remove_multi_state(cpuhp_slot);
1005 }
1006
1007 void i915_pmu_register(struct drm_i915_private *i915)
1008 {
1009         int ret;
1010
1011         if (INTEL_GEN(i915) <= 2) {
1012                 DRM_INFO("PMU not supported for this GPU.");
1013                 return;
1014         }
1015
1016         i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1017         if (!i915_pmu_events_attr_group.attrs) {
1018                 ret = -ENOMEM;
1019                 goto err;
1020         }
1021
1022         i915->pmu.base.attr_groups      = i915_pmu_attr_groups;
1023         i915->pmu.base.task_ctx_nr      = perf_invalid_context;
1024         i915->pmu.base.event_init       = i915_pmu_event_init;
1025         i915->pmu.base.add              = i915_pmu_event_add;
1026         i915->pmu.base.del              = i915_pmu_event_del;
1027         i915->pmu.base.start            = i915_pmu_event_start;
1028         i915->pmu.base.stop             = i915_pmu_event_stop;
1029         i915->pmu.base.read             = i915_pmu_event_read;
1030         i915->pmu.base.event_idx        = i915_pmu_event_event_idx;
1031
1032         spin_lock_init(&i915->pmu.lock);
1033         hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1034         i915->pmu.timer.function = i915_sample;
1035
1036         ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1037         if (ret)
1038                 goto err;
1039
1040         ret = i915_pmu_register_cpuhp_state(i915);
1041         if (ret)
1042                 goto err_unreg;
1043
1044         return;
1045
1046 err_unreg:
1047         perf_pmu_unregister(&i915->pmu.base);
1048 err:
1049         i915->pmu.base.event_init = NULL;
1050         free_event_attributes(i915);
1051         DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1052 }
1053
1054 void i915_pmu_unregister(struct drm_i915_private *i915)
1055 {
1056         if (!i915->pmu.base.event_init)
1057                 return;
1058
1059         WARN_ON(i915->pmu.enable);
1060
1061         hrtimer_cancel(&i915->pmu.timer);
1062
1063         i915_pmu_unregister_cpuhp_state(i915);
1064
1065         perf_pmu_unregister(&i915->pmu.base);
1066         i915->pmu.base.event_init = NULL;
1067         free_event_attributes(i915);
1068 }
This page took 0.098052 seconds and 4 git commands to generate.