]> Git Repo - J-linux.git/blob - tools/perf/util/stat-shadow.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[J-linux.git] / tools / perf / util / stat-shadow.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <math.h>
3 #include <stdio.h>
4 #include "evsel.h"
5 #include "stat.h"
6 #include "color.h"
7 #include "debug.h"
8 #include "pmu.h"
9 #include "rblist.h"
10 #include "evlist.h"
11 #include "expr.h"
12 #include "metricgroup.h"
13 #include "cgroup.h"
14 #include "units.h"
15 #include <linux/zalloc.h>
16 #include "iostat.h"
17 #include "util/hashmap.h"
18
19 struct stats walltime_nsecs_stats;
20 struct rusage_stats ru_stats;
21
22 enum {
23         CTX_BIT_USER    = 1 << 0,
24         CTX_BIT_KERNEL  = 1 << 1,
25         CTX_BIT_HV      = 1 << 2,
26         CTX_BIT_HOST    = 1 << 3,
27         CTX_BIT_IDLE    = 1 << 4,
28         CTX_BIT_MAX     = 1 << 5,
29 };
30
31 enum stat_type {
32         STAT_NONE = 0,
33         STAT_NSECS,
34         STAT_CYCLES,
35         STAT_INSTRUCTIONS,
36         STAT_STALLED_CYCLES_FRONT,
37         STAT_STALLED_CYCLES_BACK,
38         STAT_BRANCHES,
39         STAT_BRANCH_MISS,
40         STAT_CACHE_REFS,
41         STAT_CACHE_MISSES,
42         STAT_L1_DCACHE,
43         STAT_L1_ICACHE,
44         STAT_LL_CACHE,
45         STAT_ITLB_CACHE,
46         STAT_DTLB_CACHE,
47         STAT_L1D_MISS,
48         STAT_L1I_MISS,
49         STAT_LL_MISS,
50         STAT_DTLB_MISS,
51         STAT_ITLB_MISS,
52         STAT_MAX
53 };
54
55 static int evsel_context(const struct evsel *evsel)
56 {
57         int ctx = 0;
58
59         if (evsel->core.attr.exclude_kernel)
60                 ctx |= CTX_BIT_KERNEL;
61         if (evsel->core.attr.exclude_user)
62                 ctx |= CTX_BIT_USER;
63         if (evsel->core.attr.exclude_hv)
64                 ctx |= CTX_BIT_HV;
65         if (evsel->core.attr.exclude_host)
66                 ctx |= CTX_BIT_HOST;
67         if (evsel->core.attr.exclude_idle)
68                 ctx |= CTX_BIT_IDLE;
69
70         return ctx;
71 }
72
73 void perf_stat__reset_shadow_stats(void)
74 {
75         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
76         memset(&ru_stats, 0, sizeof(ru_stats));
77 }
78
79 static enum stat_type evsel__stat_type(const struct evsel *evsel)
80 {
81         /* Fake perf_hw_cache_op_id values for use with evsel__match. */
82         u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D |
83                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
84                 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
85         u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I |
86                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
87                 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
88         u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL |
89                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
90                 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
91         u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB |
92                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
93                 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
94         u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB |
95                 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
96                 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16);
97
98         if (evsel__is_clock(evsel))
99                 return STAT_NSECS;
100         else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
101                 return STAT_CYCLES;
102         else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS))
103                 return STAT_INSTRUCTIONS;
104         else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
105                 return STAT_STALLED_CYCLES_FRONT;
106         else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND))
107                 return STAT_STALLED_CYCLES_BACK;
108         else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS))
109                 return STAT_BRANCHES;
110         else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES))
111                 return STAT_BRANCH_MISS;
112         else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES))
113                 return STAT_CACHE_REFS;
114         else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES))
115                 return STAT_CACHE_MISSES;
116         else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D))
117                 return STAT_L1_DCACHE;
118         else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I))
119                 return STAT_L1_ICACHE;
120         else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL))
121                 return STAT_LL_CACHE;
122         else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB))
123                 return STAT_DTLB_CACHE;
124         else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB))
125                 return STAT_ITLB_CACHE;
126         else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss))
127                 return STAT_L1D_MISS;
128         else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss))
129                 return STAT_L1I_MISS;
130         else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss))
131                 return STAT_LL_MISS;
132         else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss))
133                 return STAT_DTLB_MISS;
134         else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss))
135                 return STAT_ITLB_MISS;
136         return STAT_NONE;
137 }
138
139 static const char *get_ratio_color(const double ratios[3], double val)
140 {
141         const char *color = PERF_COLOR_NORMAL;
142
143         if (val > ratios[0])
144                 color = PERF_COLOR_RED;
145         else if (val > ratios[1])
146                 color = PERF_COLOR_MAGENTA;
147         else if (val > ratios[2])
148                 color = PERF_COLOR_YELLOW;
149
150         return color;
151 }
152
153 static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type)
154 {
155         const struct evsel *cur;
156         int evsel_ctx = evsel_context(evsel);
157
158         evlist__for_each_entry(evsel->evlist, cur) {
159                 struct perf_stat_aggr *aggr;
160
161                 /* Ignore the evsel that is being searched from. */
162                 if (evsel == cur)
163                         continue;
164
165                 /* Ignore evsels that are part of different groups. */
166                 if (evsel->core.leader->nr_members > 1 &&
167                     evsel->core.leader != cur->core.leader)
168                         continue;
169                 /* Ignore evsels with mismatched modifiers. */
170                 if (evsel_ctx != evsel_context(cur))
171                         continue;
172                 /* Ignore if not the cgroup we're looking for. */
173                 if (evsel->cgrp != cur->cgrp)
174                         continue;
175                 /* Ignore if not the stat we're looking for. */
176                 if (type != evsel__stat_type(cur))
177                         continue;
178
179                 aggr = &cur->stats->aggr[aggr_idx];
180                 if (type == STAT_NSECS)
181                         return aggr->counts.val;
182                 return aggr->counts.val * cur->scale;
183         }
184         return 0.0;
185 }
186
187 static void print_ratio(struct perf_stat_config *config,
188                         const struct evsel *evsel, int aggr_idx,
189                         double numerator, struct perf_stat_output_ctx *out,
190                         enum stat_type denominator_type,
191                         const double color_ratios[3], const char *unit)
192 {
193         double denominator = find_stat(evsel, aggr_idx, denominator_type);
194
195         if (numerator && denominator) {
196                 double ratio = numerator / denominator * 100.0;
197                 const char *color = get_ratio_color(color_ratios, ratio);
198
199                 out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio);
200         } else
201                 out->print_metric(config, out->ctx, NULL, NULL, unit, 0);
202 }
203
204 static void print_stalled_cycles_front(struct perf_stat_config *config,
205                                 const struct evsel *evsel,
206                                 int aggr_idx, double stalled,
207                                 struct perf_stat_output_ctx *out)
208 {
209         static const double color_ratios[3] = {50.0, 30.0, 10.0};
210
211         print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
212                     "frontend cycles idle");
213 }
214
215 static void print_stalled_cycles_back(struct perf_stat_config *config,
216                                 const struct evsel *evsel,
217                                 int aggr_idx, double stalled,
218                                 struct perf_stat_output_ctx *out)
219 {
220         static const double color_ratios[3] = {75.0, 50.0, 20.0};
221
222         print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios,
223                     "backend cycles idle");
224 }
225
226 static void print_branch_miss(struct perf_stat_config *config,
227                         const struct evsel *evsel,
228                         int aggr_idx, double misses,
229                         struct perf_stat_output_ctx *out)
230 {
231         static const double color_ratios[3] = {20.0, 10.0, 5.0};
232
233         print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios,
234                     "of all branches");
235 }
236
237 static void print_l1d_miss(struct perf_stat_config *config,
238                         const struct evsel *evsel,
239                         int aggr_idx, double misses,
240                         struct perf_stat_output_ctx *out)
241 {
242         static const double color_ratios[3] = {20.0, 10.0, 5.0};
243
244         print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios,
245                     "of all L1-dcache accesses");
246 }
247
248 static void print_l1i_miss(struct perf_stat_config *config,
249                         const struct evsel *evsel,
250                         int aggr_idx, double misses,
251                         struct perf_stat_output_ctx *out)
252 {
253         static const double color_ratios[3] = {20.0, 10.0, 5.0};
254
255         print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios,
256                     "of all L1-icache accesses");
257 }
258
259 static void print_ll_miss(struct perf_stat_config *config,
260                         const struct evsel *evsel,
261                         int aggr_idx, double misses,
262                         struct perf_stat_output_ctx *out)
263 {
264         static const double color_ratios[3] = {20.0, 10.0, 5.0};
265
266         print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios,
267                     "of all L1-icache accesses");
268 }
269
270 static void print_dtlb_miss(struct perf_stat_config *config,
271                         const struct evsel *evsel,
272                         int aggr_idx, double misses,
273                         struct perf_stat_output_ctx *out)
274 {
275         static const double color_ratios[3] = {20.0, 10.0, 5.0};
276
277         print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios,
278                     "of all dTLB cache accesses");
279 }
280
281 static void print_itlb_miss(struct perf_stat_config *config,
282                         const struct evsel *evsel,
283                         int aggr_idx, double misses,
284                         struct perf_stat_output_ctx *out)
285 {
286         static const double color_ratios[3] = {20.0, 10.0, 5.0};
287
288         print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios,
289                     "of all iTLB cache accesses");
290 }
291
292 static void print_cache_miss(struct perf_stat_config *config,
293                         const struct evsel *evsel,
294                         int aggr_idx, double misses,
295                         struct perf_stat_output_ctx *out)
296 {
297         static const double color_ratios[3] = {20.0, 10.0, 5.0};
298
299         print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios,
300                     "of all cache refs");
301 }
302
303 static void print_instructions(struct perf_stat_config *config,
304                         const struct evsel *evsel,
305                         int aggr_idx, double instructions,
306                         struct perf_stat_output_ctx *out)
307 {
308         print_metric_t print_metric = out->print_metric;
309         void *ctxp = out->ctx;
310         double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES);
311         double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT),
312                                 find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK));
313
314         if (cycles) {
315                 print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle",
316                         instructions / cycles);
317         } else
318                 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
319
320         if (max_stalled && instructions) {
321                 out->new_line(config, ctxp);
322                 print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn",
323                         max_stalled / instructions);
324         }
325 }
326
327 static void print_cycles(struct perf_stat_config *config,
328                         const struct evsel *evsel,
329                         int aggr_idx, double cycles,
330                         struct perf_stat_output_ctx *out)
331 {
332         double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS);
333
334         if (cycles && nsecs) {
335                 double ratio = cycles / nsecs;
336
337                 out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio);
338         } else
339                 out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0);
340 }
341
342 static void print_nsecs(struct perf_stat_config *config,
343                         const struct evsel *evsel,
344                         int aggr_idx __maybe_unused, double nsecs,
345                         struct perf_stat_output_ctx *out)
346 {
347         print_metric_t print_metric = out->print_metric;
348         void *ctxp = out->ctx;
349         double wall_time = avg_stats(&walltime_nsecs_stats);
350
351         if (wall_time) {
352                 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
353                         nsecs / (wall_time * evsel->scale));
354         } else
355                 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
356 }
357
358 static int prepare_metric(struct evsel **metric_events,
359                           struct metric_ref *metric_refs,
360                           struct expr_parse_ctx *pctx,
361                           int aggr_idx)
362 {
363         int i;
364
365         for (i = 0; metric_events[i]; i++) {
366                 char *n;
367                 double val;
368                 int source_count = 0;
369
370                 if (evsel__is_tool(metric_events[i])) {
371                         struct stats *stats;
372                         double scale;
373
374                         switch (metric_events[i]->tool_event) {
375                         case PERF_TOOL_DURATION_TIME:
376                                 stats = &walltime_nsecs_stats;
377                                 scale = 1e-9;
378                                 break;
379                         case PERF_TOOL_USER_TIME:
380                                 stats = &ru_stats.ru_utime_usec_stat;
381                                 scale = 1e-6;
382                                 break;
383                         case PERF_TOOL_SYSTEM_TIME:
384                                 stats = &ru_stats.ru_stime_usec_stat;
385                                 scale = 1e-6;
386                                 break;
387                         case PERF_TOOL_NONE:
388                                 pr_err("Invalid tool event 'none'");
389                                 abort();
390                         case PERF_TOOL_MAX:
391                                 pr_err("Invalid tool event 'max'");
392                                 abort();
393                         default:
394                                 pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
395                                 abort();
396                         }
397                         val = avg_stats(stats) * scale;
398                         source_count = 1;
399                 } else {
400                         struct perf_stat_evsel *ps = metric_events[i]->stats;
401                         struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
402
403                         if (!aggr)
404                                 break;
405
406                         /*
407                          * If an event was scaled during stat gathering, reverse
408                          * the scale before computing the metric.
409                          */
410                         val = aggr->counts.val * (1.0 / metric_events[i]->scale);
411                         source_count = evsel__source_count(metric_events[i]);
412                 }
413                 n = strdup(evsel__metric_id(metric_events[i]));
414                 if (!n)
415                         return -ENOMEM;
416
417                 expr__add_id_val_source_count(pctx, n, val, source_count);
418         }
419
420         for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) {
421                 int ret = expr__add_ref(pctx, &metric_refs[j]);
422
423                 if (ret)
424                         return ret;
425         }
426
427         return i;
428 }
429
430 static void generic_metric(struct perf_stat_config *config,
431                            const char *metric_expr,
432                            const char *metric_threshold,
433                            struct evsel **metric_events,
434                            struct metric_ref *metric_refs,
435                            char *name,
436                            const char *metric_name,
437                            const char *metric_unit,
438                            int runtime,
439                            int aggr_idx,
440                            struct perf_stat_output_ctx *out)
441 {
442         print_metric_t print_metric = out->print_metric;
443         struct expr_parse_ctx *pctx;
444         double ratio, scale, threshold;
445         int i;
446         void *ctxp = out->ctx;
447         const char *color = NULL;
448
449         pctx = expr__ctx_new();
450         if (!pctx)
451                 return;
452
453         if (config->user_requested_cpu_list)
454                 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
455         pctx->sctx.runtime = runtime;
456         pctx->sctx.system_wide = config->system_wide;
457         i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
458         if (i < 0) {
459                 expr__ctx_free(pctx);
460                 return;
461         }
462         if (!metric_events[i]) {
463                 if (expr__parse(&ratio, pctx, metric_expr) == 0) {
464                         char *unit;
465                         char metric_bf[64];
466
467                         if (metric_threshold &&
468                             expr__parse(&threshold, pctx, metric_threshold) == 0 &&
469                             !isnan(threshold)) {
470                                 color = fpclassify(threshold) == FP_ZERO
471                                         ? PERF_COLOR_GREEN : PERF_COLOR_RED;
472                         }
473
474                         if (metric_unit && metric_name) {
475                                 if (perf_pmu__convert_scale(metric_unit,
476                                         &unit, &scale) >= 0) {
477                                         ratio *= scale;
478                                 }
479                                 if (strstr(metric_expr, "?"))
480                                         scnprintf(metric_bf, sizeof(metric_bf),
481                                           "%s  %s_%d", unit, metric_name, runtime);
482                                 else
483                                         scnprintf(metric_bf, sizeof(metric_bf),
484                                           "%s  %s", unit, metric_name);
485
486                                 print_metric(config, ctxp, color, "%8.1f",
487                                              metric_bf, ratio);
488                         } else {
489                                 print_metric(config, ctxp, color, "%8.2f",
490                                         metric_name ?
491                                         metric_name :
492                                         out->force_header ?  name : "",
493                                         ratio);
494                         }
495                 } else {
496                         print_metric(config, ctxp, color, /*unit=*/NULL,
497                                      out->force_header ?
498                                      (metric_name ? metric_name : name) : "", 0);
499                 }
500         } else {
501                 print_metric(config, ctxp, color, /*unit=*/NULL,
502                              out->force_header ?
503                              (metric_name ? metric_name : name) : "", 0);
504         }
505
506         expr__ctx_free(pctx);
507 }
508
509 double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
510 {
511         struct expr_parse_ctx *pctx;
512         double ratio = 0.0;
513
514         pctx = expr__ctx_new();
515         if (!pctx)
516                 return NAN;
517
518         if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
519                 goto out;
520
521         if (expr__parse(&ratio, pctx, mexp->metric_expr))
522                 ratio = 0.0;
523
524 out:
525         expr__ctx_free(pctx);
526         return ratio;
527 }
528
529 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
530                                    struct evsel *evsel,
531                                    double avg, int aggr_idx,
532                                    struct perf_stat_output_ctx *out,
533                                    struct rblist *metric_events)
534 {
535         typedef void (*stat_print_function_t)(struct perf_stat_config *config,
536                                         const struct evsel *evsel,
537                                         int aggr_idx, double misses,
538                                         struct perf_stat_output_ctx *out);
539         static const stat_print_function_t stat_print_function[STAT_MAX] = {
540                 [STAT_INSTRUCTIONS] = print_instructions,
541                 [STAT_BRANCH_MISS] = print_branch_miss,
542                 [STAT_L1D_MISS] = print_l1d_miss,
543                 [STAT_L1I_MISS] = print_l1i_miss,
544                 [STAT_DTLB_MISS] = print_dtlb_miss,
545                 [STAT_ITLB_MISS] = print_itlb_miss,
546                 [STAT_LL_MISS] = print_ll_miss,
547                 [STAT_CACHE_MISSES] = print_cache_miss,
548                 [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front,
549                 [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back,
550                 [STAT_CYCLES] = print_cycles,
551                 [STAT_NSECS] = print_nsecs,
552         };
553         print_metric_t print_metric = out->print_metric;
554         void *ctxp = out->ctx;
555         struct metric_event *me;
556         int num = 1;
557
558         if (config->iostat_run) {
559                 iostat_print_metric(config, evsel, out);
560         } else {
561                 stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)];
562
563                 if (fn)
564                         fn(config, evsel, aggr_idx, avg, out);
565                 else {
566                         double nsecs =  find_stat(evsel, aggr_idx, STAT_NSECS);
567
568                         if (nsecs) {
569                                 char unit = ' ';
570                                 char unit_buf[10] = "/sec";
571                                 double ratio = convert_unit_double(1000000000.0 * avg / nsecs,
572                                                                    &unit);
573
574                                 if (unit != ' ')
575                                         snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
576                                 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
577                         } else
578                                 num = 0;
579                 }
580         }
581
582         if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
583                 struct metric_expr *mexp;
584
585                 list_for_each_entry (mexp, &me->head, nd) {
586                         if (num++ > 0)
587                                 out->new_line(config, ctxp);
588                         generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
589                                        mexp->metric_events, mexp->metric_refs, evsel->name,
590                                        mexp->metric_name, mexp->metric_unit, mexp->runtime,
591                                        aggr_idx, out);
592                 }
593         }
594         if (num == 0)
595                 print_metric(config, ctxp, NULL, NULL, NULL, 0);
596 }
This page took 0.067357 seconds and 4 git commands to generate.