]> Git Repo - linux.git/blob - tools/perf/util/arm-spe.c
Linux 6.14-rc3
[linux.git] / tools / perf / util / arm-spe.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39
40 struct arm_spe {
41         struct auxtrace                 auxtrace;
42         struct auxtrace_queues          queues;
43         struct auxtrace_heap            heap;
44         struct itrace_synth_opts        synth_opts;
45         u32                             auxtrace_type;
46         struct perf_session             *session;
47         struct machine                  *machine;
48         u32                             pmu_type;
49
50         struct perf_tsc_conversion      tc;
51
52         u8                              timeless_decoding;
53         u8                              data_queued;
54
55         u64                             sample_type;
56         u8                              sample_flc;
57         u8                              sample_llc;
58         u8                              sample_tlb;
59         u8                              sample_branch;
60         u8                              sample_remote_access;
61         u8                              sample_memory;
62         u8                              sample_instructions;
63         u64                             instructions_sample_period;
64
65         u64                             l1d_miss_id;
66         u64                             l1d_access_id;
67         u64                             llc_miss_id;
68         u64                             llc_access_id;
69         u64                             tlb_miss_id;
70         u64                             tlb_access_id;
71         u64                             branch_id;
72         u64                             remote_access_id;
73         u64                             memory_id;
74         u64                             instructions_id;
75
76         u64                             kernel_start;
77
78         unsigned long                   num_events;
79         u8                              use_ctx_pkt_for_pid;
80
81         u64                             **metadata;
82         u64                             metadata_ver;
83         u64                             metadata_nr_cpu;
84         bool                            is_homogeneous;
85 };
86
87 struct arm_spe_queue {
88         struct arm_spe                  *spe;
89         unsigned int                    queue_nr;
90         struct auxtrace_buffer          *buffer;
91         struct auxtrace_buffer          *old_buffer;
92         union perf_event                *event_buf;
93         bool                            on_heap;
94         bool                            done;
95         pid_t                           pid;
96         pid_t                           tid;
97         int                             cpu;
98         struct arm_spe_decoder          *decoder;
99         u64                             time;
100         u64                             timestamp;
101         struct thread                   *thread;
102         u64                             period_instructions;
103         u32                             flags;
104 };
105
106 struct data_source_handle {
107         const struct midr_range *midr_ranges;
108         void (*ds_synth)(const struct arm_spe_record *record,
109                          union perf_mem_data_src *data_src);
110 };
111
112 #define DS(range, func)                                 \
113         {                                               \
114                 .midr_ranges = range,                   \
115                 .ds_synth = arm_spe__synth_##func,      \
116         }
117
118 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
119                          unsigned char *buf, size_t len)
120 {
121         struct arm_spe_pkt packet;
122         size_t pos = 0;
123         int ret, pkt_len, i;
124         char desc[ARM_SPE_PKT_DESC_MAX];
125         const char *color = PERF_COLOR_BLUE;
126
127         color_fprintf(stdout, color,
128                       ". ... ARM SPE data: size %#zx bytes\n",
129                       len);
130
131         while (len) {
132                 ret = arm_spe_get_packet(buf, len, &packet);
133                 if (ret > 0)
134                         pkt_len = ret;
135                 else
136                         pkt_len = 1;
137                 printf(".");
138                 color_fprintf(stdout, color, "  %08zx: ", pos);
139                 for (i = 0; i < pkt_len; i++)
140                         color_fprintf(stdout, color, " %02x", buf[i]);
141                 for (; i < 16; i++)
142                         color_fprintf(stdout, color, "   ");
143                 if (ret > 0) {
144                         ret = arm_spe_pkt_desc(&packet, desc,
145                                                ARM_SPE_PKT_DESC_MAX);
146                         if (!ret)
147                                 color_fprintf(stdout, color, " %s\n", desc);
148                 } else {
149                         color_fprintf(stdout, color, " Bad packet!\n");
150                 }
151                 pos += pkt_len;
152                 buf += pkt_len;
153                 len -= pkt_len;
154         }
155 }
156
157 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
158                                size_t len)
159 {
160         printf(".\n");
161         arm_spe_dump(spe, buf, len);
162 }
163
164 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
165 {
166         struct arm_spe_queue *speq = data;
167         struct auxtrace_buffer *buffer = speq->buffer;
168         struct auxtrace_buffer *old_buffer = speq->old_buffer;
169         struct auxtrace_queue *queue;
170
171         queue = &speq->spe->queues.queue_array[speq->queue_nr];
172
173         buffer = auxtrace_buffer__next(queue, buffer);
174         /* If no more data, drop the previous auxtrace_buffer and return */
175         if (!buffer) {
176                 if (old_buffer)
177                         auxtrace_buffer__drop_data(old_buffer);
178                 b->len = 0;
179                 return 0;
180         }
181
182         speq->buffer = buffer;
183
184         /* If the aux_buffer doesn't have data associated, try to load it */
185         if (!buffer->data) {
186                 /* get the file desc associated with the perf data file */
187                 int fd = perf_data__fd(speq->spe->session->data);
188
189                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
190                 if (!buffer->data)
191                         return -ENOMEM;
192         }
193
194         b->len = buffer->size;
195         b->buf = buffer->data;
196
197         if (b->len) {
198                 if (old_buffer)
199                         auxtrace_buffer__drop_data(old_buffer);
200                 speq->old_buffer = buffer;
201         } else {
202                 auxtrace_buffer__drop_data(buffer);
203                 return arm_spe_get_trace(b, data);
204         }
205
206         return 0;
207 }
208
209 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
210                 unsigned int queue_nr)
211 {
212         struct arm_spe_params params = { .get_trace = 0, };
213         struct arm_spe_queue *speq;
214
215         speq = zalloc(sizeof(*speq));
216         if (!speq)
217                 return NULL;
218
219         speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
220         if (!speq->event_buf)
221                 goto out_free;
222
223         speq->spe = spe;
224         speq->queue_nr = queue_nr;
225         speq->pid = -1;
226         speq->tid = -1;
227         speq->cpu = -1;
228         speq->period_instructions = 0;
229
230         /* params set */
231         params.get_trace = arm_spe_get_trace;
232         params.data = speq;
233
234         /* create new decoder */
235         speq->decoder = arm_spe_decoder_new(&params);
236         if (!speq->decoder)
237                 goto out_free;
238
239         return speq;
240
241 out_free:
242         zfree(&speq->event_buf);
243         free(speq);
244
245         return NULL;
246 }
247
248 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
249 {
250         return ip >= spe->kernel_start ?
251                 PERF_RECORD_MISC_KERNEL :
252                 PERF_RECORD_MISC_USER;
253 }
254
255 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
256                                     struct auxtrace_queue *queue)
257 {
258         struct arm_spe_queue *speq = queue->priv;
259         pid_t tid;
260
261         tid = machine__get_current_tid(spe->machine, speq->cpu);
262         if (tid != -1) {
263                 speq->tid = tid;
264                 thread__zput(speq->thread);
265         } else
266                 speq->tid = queue->tid;
267
268         if ((!speq->thread) && (speq->tid != -1)) {
269                 speq->thread = machine__find_thread(spe->machine, -1,
270                                                     speq->tid);
271         }
272
273         if (speq->thread) {
274                 speq->pid = thread__pid(speq->thread);
275                 if (queue->cpu == -1)
276                         speq->cpu = thread__cpu(speq->thread);
277         }
278 }
279
280 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
281 {
282         struct arm_spe *spe = speq->spe;
283         int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
284
285         if (err)
286                 return err;
287
288         arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
289
290         return 0;
291 }
292
293 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
294 {
295         u64 i;
296
297         if (!spe->metadata)
298                 return NULL;
299
300         for (i = 0; i < spe->metadata_nr_cpu; i++)
301                 if (spe->metadata[i][ARM_SPE_CPU] == cpu)
302                         return spe->metadata[i];
303
304         return NULL;
305 }
306
307 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
308 {
309         struct simd_flags simd_flags = {};
310
311         if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
312                 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
313
314         if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
315                 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
316
317         if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
318                 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
319
320         if (record->type & ARM_SPE_SVE_EMPTY_PRED)
321                 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
322
323         return simd_flags;
324 }
325
326 static void arm_spe_prep_sample(struct arm_spe *spe,
327                                 struct arm_spe_queue *speq,
328                                 union perf_event *event,
329                                 struct perf_sample *sample)
330 {
331         struct arm_spe_record *record = &speq->decoder->record;
332
333         if (!spe->timeless_decoding)
334                 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
335
336         sample->ip = record->from_ip;
337         sample->cpumode = arm_spe_cpumode(spe, sample->ip);
338         sample->pid = speq->pid;
339         sample->tid = speq->tid;
340         sample->period = 1;
341         sample->cpu = speq->cpu;
342         sample->simd_flags = arm_spe__synth_simd_flags(record);
343
344         event->sample.header.type = PERF_RECORD_SAMPLE;
345         event->sample.header.misc = sample->cpumode;
346         event->sample.header.size = sizeof(struct perf_event_header);
347 }
348
349 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
350 {
351         event->header.size = perf_event__sample_event_size(sample, type, 0);
352         return perf_event__synthesize_sample(event, type, 0, sample);
353 }
354
355 static inline int
356 arm_spe_deliver_synth_event(struct arm_spe *spe,
357                             struct arm_spe_queue *speq __maybe_unused,
358                             union perf_event *event,
359                             struct perf_sample *sample)
360 {
361         int ret;
362
363         if (spe->synth_opts.inject) {
364                 ret = arm_spe__inject_event(event, sample, spe->sample_type);
365                 if (ret)
366                         return ret;
367         }
368
369         ret = perf_session__deliver_synth_event(spe->session, event, sample);
370         if (ret)
371                 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
372
373         return ret;
374 }
375
376 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
377                                      u64 spe_events_id, u64 data_src)
378 {
379         struct arm_spe *spe = speq->spe;
380         struct arm_spe_record *record = &speq->decoder->record;
381         union perf_event *event = speq->event_buf;
382         struct perf_sample sample = { .ip = 0, };
383
384         arm_spe_prep_sample(spe, speq, event, &sample);
385
386         sample.id = spe_events_id;
387         sample.stream_id = spe_events_id;
388         sample.addr = record->virt_addr;
389         sample.phys_addr = record->phys_addr;
390         sample.data_src = data_src;
391         sample.weight = record->latency;
392
393         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
394 }
395
396 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
397                                         u64 spe_events_id)
398 {
399         struct arm_spe *spe = speq->spe;
400         struct arm_spe_record *record = &speq->decoder->record;
401         union perf_event *event = speq->event_buf;
402         struct perf_sample sample = { .ip = 0, };
403
404         arm_spe_prep_sample(spe, speq, event, &sample);
405
406         sample.id = spe_events_id;
407         sample.stream_id = spe_events_id;
408         sample.addr = record->to_ip;
409         sample.weight = record->latency;
410         sample.flags = speq->flags;
411
412         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
413 }
414
415 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
416                                              u64 spe_events_id, u64 data_src)
417 {
418         struct arm_spe *spe = speq->spe;
419         struct arm_spe_record *record = &speq->decoder->record;
420         union perf_event *event = speq->event_buf;
421         struct perf_sample sample = { .ip = 0, };
422
423         /*
424          * Handles perf instruction sampling period.
425          */
426         speq->period_instructions++;
427         if (speq->period_instructions < spe->instructions_sample_period)
428                 return 0;
429         speq->period_instructions = 0;
430
431         arm_spe_prep_sample(spe, speq, event, &sample);
432
433         sample.id = spe_events_id;
434         sample.stream_id = spe_events_id;
435         sample.addr = record->to_ip;
436         sample.phys_addr = record->phys_addr;
437         sample.data_src = data_src;
438         sample.period = spe->instructions_sample_period;
439         sample.weight = record->latency;
440         sample.flags = speq->flags;
441
442         return arm_spe_deliver_synth_event(spe, speq, event, &sample);
443 }
444
445 static const struct midr_range common_ds_encoding_cpus[] = {
446         MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
447         MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
448         MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
449         MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
450         MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
451         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
452         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
453         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
454         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
455         {},
456 };
457
458 static const struct midr_range ampereone_ds_encoding_cpus[] = {
459         MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
460         {},
461 };
462
463 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
464 {
465         const struct arm_spe_record *record = &speq->decoder->record;
466
467         speq->flags = 0;
468         if (record->op & ARM_SPE_OP_BRANCH_ERET) {
469                 speq->flags = PERF_IP_FLAG_BRANCH;
470
471                 if (record->type & ARM_SPE_BRANCH_MISS)
472                         speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
473         }
474 }
475
476 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
477                                               union perf_mem_data_src *data_src)
478 {
479         /*
480          * Even though four levels of cache hierarchy are possible, no known
481          * production Neoverse systems currently include more than three levels
482          * so for the time being we assume three exist. If a production system
483          * is built with four the this function would have to be changed to
484          * detect the number of levels for reporting.
485          */
486
487         /*
488          * We have no data on the hit level or data source for stores in the
489          * Neoverse SPE records.
490          */
491         if (record->op & ARM_SPE_OP_ST) {
492                 data_src->mem_lvl = PERF_MEM_LVL_NA;
493                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
494                 data_src->mem_snoop = PERF_MEM_SNOOP_NA;
495                 return;
496         }
497
498         switch (record->source) {
499         case ARM_SPE_COMMON_DS_L1D:
500                 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
501                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
502                 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
503                 break;
504         case ARM_SPE_COMMON_DS_L2:
505                 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
506                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
507                 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
508                 break;
509         case ARM_SPE_COMMON_DS_PEER_CORE:
510                 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
511                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
512                 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
513                 break;
514         /*
515          * We don't know if this is L1, L2 but we do know it was a cache-2-cache
516          * transfer, so set SNOOPX_PEER
517          */
518         case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
519         case ARM_SPE_COMMON_DS_PEER_CLUSTER:
520                 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
521                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
522                 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
523                 break;
524         /*
525          * System cache is assumed to be L3
526          */
527         case ARM_SPE_COMMON_DS_SYS_CACHE:
528                 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
529                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
530                 data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
531                 break;
532         /*
533          * We don't know what level it hit in, except it came from the other
534          * socket
535          */
536         case ARM_SPE_COMMON_DS_REMOTE:
537                 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
538                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
539                 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
540                 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
541                 break;
542         case ARM_SPE_COMMON_DS_DRAM:
543                 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
544                 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
545                 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
546                 break;
547         default:
548                 break;
549         }
550 }
551
552 /*
553  * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
554  * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
555  */
556 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
557                                                  union perf_mem_data_src *data_src)
558 {
559         struct arm_spe_record common_record;
560
561         switch (record->source) {
562         case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
563                 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
564                 break;
565         case ARM_SPE_AMPEREONE_SLC:
566                 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
567                 break;
568         case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
569                 common_record.source = ARM_SPE_COMMON_DS_REMOTE;
570                 break;
571         case ARM_SPE_AMPEREONE_DDR:
572                 common_record.source = ARM_SPE_COMMON_DS_DRAM;
573                 break;
574         case ARM_SPE_AMPEREONE_L1D:
575                 common_record.source = ARM_SPE_COMMON_DS_L1D;
576                 break;
577         case ARM_SPE_AMPEREONE_L2D:
578                 common_record.source = ARM_SPE_COMMON_DS_L2;
579                 break;
580         default:
581                 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
582                                 record->source);
583                 return;
584         }
585
586         common_record.op = record->op;
587         arm_spe__synth_data_source_common(&common_record, data_src);
588 }
589
590 static const struct data_source_handle data_source_handles[] = {
591         DS(common_ds_encoding_cpus, data_source_common),
592         DS(ampereone_ds_encoding_cpus, data_source_ampereone),
593 };
594
595 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
596                                         union perf_mem_data_src *data_src)
597 {
598         if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
599                 data_src->mem_lvl = PERF_MEM_LVL_L3;
600
601                 if (record->type & ARM_SPE_LLC_MISS)
602                         data_src->mem_lvl |= PERF_MEM_LVL_MISS;
603                 else
604                         data_src->mem_lvl |= PERF_MEM_LVL_HIT;
605         } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
606                 data_src->mem_lvl = PERF_MEM_LVL_L1;
607
608                 if (record->type & ARM_SPE_L1D_MISS)
609                         data_src->mem_lvl |= PERF_MEM_LVL_MISS;
610                 else
611                         data_src->mem_lvl |= PERF_MEM_LVL_HIT;
612         }
613
614         if (record->type & ARM_SPE_REMOTE_ACCESS)
615                 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
616 }
617
618 static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
619                               const struct arm_spe_record *record,
620                               union perf_mem_data_src *data_src)
621 {
622         struct arm_spe *spe = speq->spe;
623         u64 *metadata = NULL;
624         u64 midr;
625         unsigned int i;
626
627         /* Metadata version 1 assumes all CPUs are the same (old behavior) */
628         if (spe->metadata_ver == 1) {
629                 const char *cpuid;
630
631                 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
632                 cpuid = perf_env__cpuid(spe->session->evlist->env);
633                 midr = strtol(cpuid, NULL, 16);
634         } else {
635                 /* CPU ID is -1 for per-thread mode */
636                 if (speq->cpu < 0) {
637                         /*
638                          * On the heterogeneous system, due to CPU ID is -1,
639                          * cannot confirm the data source packet is supported.
640                          */
641                         if (!spe->is_homogeneous)
642                                 return false;
643
644                         /* In homogeneous system, simply use CPU0's metadata */
645                         if (spe->metadata)
646                                 metadata = spe->metadata[0];
647                 } else {
648                         metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
649                 }
650
651                 if (!metadata)
652                         return false;
653
654                 midr = metadata[ARM_SPE_CPU_MIDR];
655         }
656
657         for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
658                 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
659                         data_source_handles[i].ds_synth(record, data_src);
660                         return true;
661                 }
662         }
663
664         return false;
665 }
666
667 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
668                                       const struct arm_spe_record *record)
669 {
670         union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
671
672         if (record->op & ARM_SPE_OP_LD)
673                 data_src.mem_op = PERF_MEM_OP_LOAD;
674         else if (record->op & ARM_SPE_OP_ST)
675                 data_src.mem_op = PERF_MEM_OP_STORE;
676         else
677                 return 0;
678
679         if (!arm_spe__synth_ds(speq, record, &data_src))
680                 arm_spe__synth_memory_level(record, &data_src);
681
682         if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
683                 data_src.mem_dtlb = PERF_MEM_TLB_WK;
684
685                 if (record->type & ARM_SPE_TLB_MISS)
686                         data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
687                 else
688                         data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
689         }
690
691         return data_src.val;
692 }
693
694 static int arm_spe_sample(struct arm_spe_queue *speq)
695 {
696         const struct arm_spe_record *record = &speq->decoder->record;
697         struct arm_spe *spe = speq->spe;
698         u64 data_src;
699         int err;
700
701         arm_spe__sample_flags(speq);
702         data_src = arm_spe__synth_data_source(speq, record);
703
704         if (spe->sample_flc) {
705                 if (record->type & ARM_SPE_L1D_MISS) {
706                         err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
707                                                         data_src);
708                         if (err)
709                                 return err;
710                 }
711
712                 if (record->type & ARM_SPE_L1D_ACCESS) {
713                         err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
714                                                         data_src);
715                         if (err)
716                                 return err;
717                 }
718         }
719
720         if (spe->sample_llc) {
721                 if (record->type & ARM_SPE_LLC_MISS) {
722                         err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
723                                                         data_src);
724                         if (err)
725                                 return err;
726                 }
727
728                 if (record->type & ARM_SPE_LLC_ACCESS) {
729                         err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
730                                                         data_src);
731                         if (err)
732                                 return err;
733                 }
734         }
735
736         if (spe->sample_tlb) {
737                 if (record->type & ARM_SPE_TLB_MISS) {
738                         err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
739                                                         data_src);
740                         if (err)
741                                 return err;
742                 }
743
744                 if (record->type & ARM_SPE_TLB_ACCESS) {
745                         err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
746                                                         data_src);
747                         if (err)
748                                 return err;
749                 }
750         }
751
752         if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
753                 err = arm_spe__synth_branch_sample(speq, spe->branch_id);
754                 if (err)
755                         return err;
756         }
757
758         if (spe->sample_remote_access &&
759             (record->type & ARM_SPE_REMOTE_ACCESS)) {
760                 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
761                                                 data_src);
762                 if (err)
763                         return err;
764         }
765
766         /*
767          * When data_src is zero it means the record is not a memory operation,
768          * skip to synthesize memory sample for this case.
769          */
770         if (spe->sample_memory && data_src) {
771                 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
772                 if (err)
773                         return err;
774         }
775
776         if (spe->sample_instructions) {
777                 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
778                 if (err)
779                         return err;
780         }
781
782         return 0;
783 }
784
785 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
786 {
787         struct arm_spe *spe = speq->spe;
788         struct arm_spe_record *record;
789         int ret;
790
791         if (!spe->kernel_start)
792                 spe->kernel_start = machine__kernel_start(spe->machine);
793
794         while (1) {
795                 /*
796                  * The usual logic is firstly to decode the packets, and then
797                  * based the record to synthesize sample; but here the flow is
798                  * reversed: it calls arm_spe_sample() for synthesizing samples
799                  * prior to arm_spe_decode().
800                  *
801                  * Two reasons for this code logic:
802                  * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
803                  * has decoded trace data and generated a record, but the record
804                  * is left to generate sample until run to here, so it's correct
805                  * to synthesize sample for the left record.
806                  * 2. After decoding trace data, it needs to compare the record
807                  * timestamp with the coming perf event, if the record timestamp
808                  * is later than the perf event, it needs bail out and pushs the
809                  * record into auxtrace heap, thus the record can be deferred to
810                  * synthesize sample until run to here at the next time; so this
811                  * can correlate samples between Arm SPE trace data and other
812                  * perf events with correct time ordering.
813                  */
814
815                 /*
816                  * Update pid/tid info.
817                  */
818                 record = &speq->decoder->record;
819                 if (!spe->timeless_decoding && record->context_id != (u64)-1) {
820                         ret = arm_spe_set_tid(speq, record->context_id);
821                         if (ret)
822                                 return ret;
823
824                         spe->use_ctx_pkt_for_pid = true;
825                 }
826
827                 ret = arm_spe_sample(speq);
828                 if (ret)
829                         return ret;
830
831                 ret = arm_spe_decode(speq->decoder);
832                 if (!ret) {
833                         pr_debug("No data or all data has been processed.\n");
834                         return 1;
835                 }
836
837                 /*
838                  * Error is detected when decode SPE trace data, continue to
839                  * the next trace data and find out more records.
840                  */
841                 if (ret < 0)
842                         continue;
843
844                 record = &speq->decoder->record;
845
846                 /* Update timestamp for the last record */
847                 if (record->timestamp > speq->timestamp)
848                         speq->timestamp = record->timestamp;
849
850                 /*
851                  * If the timestamp of the queue is later than timestamp of the
852                  * coming perf event, bail out so can allow the perf event to
853                  * be processed ahead.
854                  */
855                 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
856                         *timestamp = speq->timestamp;
857                         return 0;
858                 }
859         }
860
861         return 0;
862 }
863
864 static int arm_spe__setup_queue(struct arm_spe *spe,
865                                struct auxtrace_queue *queue,
866                                unsigned int queue_nr)
867 {
868         struct arm_spe_queue *speq = queue->priv;
869         struct arm_spe_record *record;
870
871         if (list_empty(&queue->head) || speq)
872                 return 0;
873
874         speq = arm_spe__alloc_queue(spe, queue_nr);
875
876         if (!speq)
877                 return -ENOMEM;
878
879         queue->priv = speq;
880
881         if (queue->cpu != -1)
882                 speq->cpu = queue->cpu;
883
884         if (!speq->on_heap) {
885                 int ret;
886
887                 if (spe->timeless_decoding)
888                         return 0;
889
890 retry:
891                 ret = arm_spe_decode(speq->decoder);
892
893                 if (!ret)
894                         return 0;
895
896                 if (ret < 0)
897                         goto retry;
898
899                 record = &speq->decoder->record;
900
901                 speq->timestamp = record->timestamp;
902                 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
903                 if (ret)
904                         return ret;
905                 speq->on_heap = true;
906         }
907
908         return 0;
909 }
910
911 static int arm_spe__setup_queues(struct arm_spe *spe)
912 {
913         unsigned int i;
914         int ret;
915
916         for (i = 0; i < spe->queues.nr_queues; i++) {
917                 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
918                 if (ret)
919                         return ret;
920         }
921
922         return 0;
923 }
924
925 static int arm_spe__update_queues(struct arm_spe *spe)
926 {
927         if (spe->queues.new_data) {
928                 spe->queues.new_data = false;
929                 return arm_spe__setup_queues(spe);
930         }
931
932         return 0;
933 }
934
935 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
936 {
937         struct evsel *evsel;
938         struct evlist *evlist = spe->session->evlist;
939         bool timeless_decoding = true;
940
941         /*
942          * Circle through the list of event and complain if we find one
943          * with the time bit set.
944          */
945         evlist__for_each_entry(evlist, evsel) {
946                 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
947                         timeless_decoding = false;
948         }
949
950         return timeless_decoding;
951 }
952
953 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
954 {
955         unsigned int queue_nr;
956         u64 ts;
957         int ret;
958
959         while (1) {
960                 struct auxtrace_queue *queue;
961                 struct arm_spe_queue *speq;
962
963                 if (!spe->heap.heap_cnt)
964                         return 0;
965
966                 if (spe->heap.heap_array[0].ordinal >= timestamp)
967                         return 0;
968
969                 queue_nr = spe->heap.heap_array[0].queue_nr;
970                 queue = &spe->queues.queue_array[queue_nr];
971                 speq = queue->priv;
972
973                 auxtrace_heap__pop(&spe->heap);
974
975                 if (spe->heap.heap_cnt) {
976                         ts = spe->heap.heap_array[0].ordinal + 1;
977                         if (ts > timestamp)
978                                 ts = timestamp;
979                 } else {
980                         ts = timestamp;
981                 }
982
983                 /*
984                  * A previous context-switch event has set pid/tid in the machine's context, so
985                  * here we need to update the pid/tid in the thread and SPE queue.
986                  */
987                 if (!spe->use_ctx_pkt_for_pid)
988                         arm_spe_set_pid_tid_cpu(spe, queue);
989
990                 ret = arm_spe_run_decoder(speq, &ts);
991                 if (ret < 0) {
992                         auxtrace_heap__add(&spe->heap, queue_nr, ts);
993                         return ret;
994                 }
995
996                 if (!ret) {
997                         ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
998                         if (ret < 0)
999                                 return ret;
1000                 } else {
1001                         speq->on_heap = false;
1002                 }
1003         }
1004
1005         return 0;
1006 }
1007
1008 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
1009                                             u64 time_)
1010 {
1011         struct auxtrace_queues *queues = &spe->queues;
1012         unsigned int i;
1013         u64 ts = 0;
1014
1015         for (i = 0; i < queues->nr_queues; i++) {
1016                 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
1017                 struct arm_spe_queue *speq = queue->priv;
1018
1019                 if (speq && (tid == -1 || speq->tid == tid)) {
1020                         speq->time = time_;
1021                         arm_spe_set_pid_tid_cpu(spe, queue);
1022                         arm_spe_run_decoder(speq, &ts);
1023                 }
1024         }
1025         return 0;
1026 }
1027
1028 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
1029                                   struct perf_sample *sample)
1030 {
1031         pid_t pid, tid;
1032         int cpu;
1033
1034         if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
1035                 return 0;
1036
1037         pid = event->context_switch.next_prev_pid;
1038         tid = event->context_switch.next_prev_tid;
1039         cpu = sample->cpu;
1040
1041         if (tid == -1)
1042                 pr_warning("context_switch event has no tid\n");
1043
1044         return machine__set_current_tid(spe->machine, cpu, pid, tid);
1045 }
1046
1047 static int arm_spe_process_event(struct perf_session *session,
1048                                  union perf_event *event,
1049                                  struct perf_sample *sample,
1050                                  const struct perf_tool *tool)
1051 {
1052         int err = 0;
1053         u64 timestamp;
1054         struct arm_spe *spe = container_of(session->auxtrace,
1055                         struct arm_spe, auxtrace);
1056
1057         if (dump_trace)
1058                 return 0;
1059
1060         if (!tool->ordered_events) {
1061                 pr_err("SPE trace requires ordered events\n");
1062                 return -EINVAL;
1063         }
1064
1065         if (sample->time && (sample->time != (u64) -1))
1066                 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1067         else
1068                 timestamp = 0;
1069
1070         if (timestamp || spe->timeless_decoding) {
1071                 err = arm_spe__update_queues(spe);
1072                 if (err)
1073                         return err;
1074         }
1075
1076         if (spe->timeless_decoding) {
1077                 if (event->header.type == PERF_RECORD_EXIT) {
1078                         err = arm_spe_process_timeless_queues(spe,
1079                                         event->fork.tid,
1080                                         sample->time);
1081                 }
1082         } else if (timestamp) {
1083                 err = arm_spe_process_queues(spe, timestamp);
1084                 if (err)
1085                         return err;
1086
1087                 if (!spe->use_ctx_pkt_for_pid &&
1088                     (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1089                     event->header.type == PERF_RECORD_SWITCH))
1090                         err = arm_spe_context_switch(spe, event, sample);
1091         }
1092
1093         return err;
1094 }
1095
1096 static int arm_spe_process_auxtrace_event(struct perf_session *session,
1097                                           union perf_event *event,
1098                                           const struct perf_tool *tool __maybe_unused)
1099 {
1100         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1101                                              auxtrace);
1102
1103         if (!spe->data_queued) {
1104                 struct auxtrace_buffer *buffer;
1105                 off_t data_offset;
1106                 int fd = perf_data__fd(session->data);
1107                 int err;
1108
1109                 if (perf_data__is_pipe(session->data)) {
1110                         data_offset = 0;
1111                 } else {
1112                         data_offset = lseek(fd, 0, SEEK_CUR);
1113                         if (data_offset == -1)
1114                                 return -errno;
1115                 }
1116
1117                 err = auxtrace_queues__add_event(&spe->queues, session, event,
1118                                 data_offset, &buffer);
1119                 if (err)
1120                         return err;
1121
1122                 /* Dump here now we have copied a piped trace out of the pipe */
1123                 if (dump_trace) {
1124                         if (auxtrace_buffer__get_data(buffer, fd)) {
1125                                 arm_spe_dump_event(spe, buffer->data,
1126                                                 buffer->size);
1127                                 auxtrace_buffer__put_data(buffer);
1128                         }
1129                 }
1130         }
1131
1132         return 0;
1133 }
1134
1135 static int arm_spe_flush(struct perf_session *session __maybe_unused,
1136                          const struct perf_tool *tool __maybe_unused)
1137 {
1138         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1139                         auxtrace);
1140         int ret;
1141
1142         if (dump_trace)
1143                 return 0;
1144
1145         if (!tool->ordered_events)
1146                 return -EINVAL;
1147
1148         ret = arm_spe__update_queues(spe);
1149         if (ret < 0)
1150                 return ret;
1151
1152         if (spe->timeless_decoding)
1153                 return arm_spe_process_timeless_queues(spe, -1,
1154                                 MAX_TIMESTAMP - 1);
1155
1156         ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1157         if (ret)
1158                 return ret;
1159
1160         if (!spe->use_ctx_pkt_for_pid)
1161                 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1162                             "Matching of TIDs to SPE events could be inaccurate.\n");
1163
1164         return 0;
1165 }
1166
1167 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1168 {
1169         u64 *metadata;
1170
1171         metadata = zalloc(per_cpu_size);
1172         if (!metadata)
1173                 return NULL;
1174
1175         memcpy(metadata, buf, per_cpu_size);
1176         return metadata;
1177 }
1178
1179 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1180 {
1181         int i;
1182
1183         for (i = 0; i < nr_cpu; i++)
1184                 zfree(&metadata[i]);
1185         free(metadata);
1186 }
1187
1188 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1189                                      u64 *ver, int *nr_cpu)
1190 {
1191         u64 *ptr = (u64 *)info->priv;
1192         u64 metadata_size;
1193         u64 **metadata = NULL;
1194         int hdr_sz, per_cpu_sz, i;
1195
1196         metadata_size = info->header.size -
1197                 sizeof(struct perf_record_auxtrace_info);
1198
1199         /* Metadata version 1 */
1200         if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1201                 *ver = 1;
1202                 *nr_cpu = 0;
1203                 /* No per CPU metadata */
1204                 return NULL;
1205         }
1206
1207         *ver = ptr[ARM_SPE_HEADER_VERSION];
1208         hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1209         *nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1210
1211         metadata = calloc(*nr_cpu, sizeof(*metadata));
1212         if (!metadata)
1213                 return NULL;
1214
1215         /* Locate the start address of per CPU metadata */
1216         ptr += hdr_sz;
1217         per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1218
1219         for (i = 0; i < *nr_cpu; i++) {
1220                 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1221                 if (!metadata[i])
1222                         goto err_per_cpu_metadata;
1223
1224                 ptr += per_cpu_sz / sizeof(u64);
1225         }
1226
1227         return metadata;
1228
1229 err_per_cpu_metadata:
1230         arm_spe__free_metadata(metadata, *nr_cpu);
1231         return NULL;
1232 }
1233
1234 static void arm_spe_free_queue(void *priv)
1235 {
1236         struct arm_spe_queue *speq = priv;
1237
1238         if (!speq)
1239                 return;
1240         thread__zput(speq->thread);
1241         arm_spe_decoder_free(speq->decoder);
1242         zfree(&speq->event_buf);
1243         free(speq);
1244 }
1245
1246 static void arm_spe_free_events(struct perf_session *session)
1247 {
1248         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1249                                              auxtrace);
1250         struct auxtrace_queues *queues = &spe->queues;
1251         unsigned int i;
1252
1253         for (i = 0; i < queues->nr_queues; i++) {
1254                 arm_spe_free_queue(queues->queue_array[i].priv);
1255                 queues->queue_array[i].priv = NULL;
1256         }
1257         auxtrace_queues__free(queues);
1258 }
1259
1260 static void arm_spe_free(struct perf_session *session)
1261 {
1262         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1263                                              auxtrace);
1264
1265         auxtrace_heap__free(&spe->heap);
1266         arm_spe_free_events(session);
1267         session->auxtrace = NULL;
1268         arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1269         free(spe);
1270 }
1271
1272 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1273                                       struct evsel *evsel)
1274 {
1275         struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1276
1277         return evsel->core.attr.type == spe->pmu_type;
1278 }
1279
1280 static const char * const metadata_hdr_v1_fmts[] = {
1281         [ARM_SPE_PMU_TYPE]              = "  PMU Type           :%"PRId64"\n",
1282         [ARM_SPE_PER_CPU_MMAPS]         = "  Per CPU mmaps      :%"PRId64"\n",
1283 };
1284
1285 static const char * const metadata_hdr_fmts[] = {
1286         [ARM_SPE_HEADER_VERSION]        = "  Header version     :%"PRId64"\n",
1287         [ARM_SPE_HEADER_SIZE]           = "  Header size        :%"PRId64"\n",
1288         [ARM_SPE_PMU_TYPE_V2]           = "  PMU type v2        :%"PRId64"\n",
1289         [ARM_SPE_CPUS_NUM]              = "  CPU number         :%"PRId64"\n",
1290 };
1291
1292 static const char * const metadata_per_cpu_fmts[] = {
1293         [ARM_SPE_MAGIC]                 = "    Magic            :0x%"PRIx64"\n",
1294         [ARM_SPE_CPU]                   = "    CPU #            :%"PRId64"\n",
1295         [ARM_SPE_CPU_NR_PARAMS]         = "    Num of params    :%"PRId64"\n",
1296         [ARM_SPE_CPU_MIDR]              = "    MIDR             :0x%"PRIx64"\n",
1297         [ARM_SPE_CPU_PMU_TYPE]          = "    PMU Type         :%"PRId64"\n",
1298         [ARM_SPE_CAP_MIN_IVAL]          = "    Min Interval     :%"PRId64"\n",
1299 };
1300
1301 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1302 {
1303         unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1304         const char * const *hdr_fmts;
1305
1306         if (!dump_trace)
1307                 return;
1308
1309         if (spe->metadata_ver == 1) {
1310                 cpu_num = 0;
1311                 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1312                 hdr_fmts = metadata_hdr_v1_fmts;
1313         } else {
1314                 cpu_num = arr[ARM_SPE_CPUS_NUM];
1315                 hdr_size = arr[ARM_SPE_HEADER_SIZE];
1316                 hdr_fmts = metadata_hdr_fmts;
1317         }
1318
1319         for (i = 0; i < hdr_size; i++)
1320                 fprintf(stdout, hdr_fmts[i], arr[i]);
1321
1322         arr += hdr_size;
1323         for (cpu = 0; cpu < cpu_num; cpu++) {
1324                 /*
1325                  * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1326                  * are fixed. The sequential parameter size is decided by the
1327                  * field 'ARM_SPE_CPU_NR_PARAMS'.
1328                  */
1329                 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1330                 for (i = 0; i < cpu_size; i++)
1331                         fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1332                 arr += cpu_size;
1333         }
1334 }
1335
1336 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1337                                     const char *name)
1338 {
1339         struct evsel *evsel;
1340
1341         evlist__for_each_entry(evlist, evsel) {
1342                 if (evsel->core.id && evsel->core.id[0] == id) {
1343                         if (evsel->name)
1344                                 zfree(&evsel->name);
1345                         evsel->name = strdup(name);
1346                         break;
1347                 }
1348         }
1349 }
1350
1351 static int
1352 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1353 {
1354         struct evlist *evlist = session->evlist;
1355         struct evsel *evsel;
1356         struct perf_event_attr attr;
1357         bool found = false;
1358         u64 id;
1359         int err;
1360
1361         evlist__for_each_entry(evlist, evsel) {
1362                 if (evsel->core.attr.type == spe->pmu_type) {
1363                         found = true;
1364                         break;
1365                 }
1366         }
1367
1368         if (!found) {
1369                 pr_debug("No selected events with SPE trace data\n");
1370                 return 0;
1371         }
1372
1373         memset(&attr, 0, sizeof(struct perf_event_attr));
1374         attr.size = sizeof(struct perf_event_attr);
1375         attr.type = PERF_TYPE_HARDWARE;
1376         attr.sample_type = evsel->core.attr.sample_type &
1377                                 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1378         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1379                             PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1380                             PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1381         if (spe->timeless_decoding)
1382                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1383         else
1384                 attr.sample_type |= PERF_SAMPLE_TIME;
1385
1386         spe->sample_type = attr.sample_type;
1387
1388         attr.exclude_user = evsel->core.attr.exclude_user;
1389         attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1390         attr.exclude_hv = evsel->core.attr.exclude_hv;
1391         attr.exclude_host = evsel->core.attr.exclude_host;
1392         attr.exclude_guest = evsel->core.attr.exclude_guest;
1393         attr.sample_id_all = evsel->core.attr.sample_id_all;
1394         attr.read_format = evsel->core.attr.read_format;
1395
1396         /* create new id val to be a fixed offset from evsel id */
1397         id = evsel->core.id[0] + 1000000000;
1398
1399         if (!id)
1400                 id = 1;
1401
1402         if (spe->synth_opts.flc) {
1403                 spe->sample_flc = true;
1404
1405                 /* Level 1 data cache miss */
1406                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1407                 if (err)
1408                         return err;
1409                 spe->l1d_miss_id = id;
1410                 arm_spe_set_event_name(evlist, id, "l1d-miss");
1411                 id += 1;
1412
1413                 /* Level 1 data cache access */
1414                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1415                 if (err)
1416                         return err;
1417                 spe->l1d_access_id = id;
1418                 arm_spe_set_event_name(evlist, id, "l1d-access");
1419                 id += 1;
1420         }
1421
1422         if (spe->synth_opts.llc) {
1423                 spe->sample_llc = true;
1424
1425                 /* Last level cache miss */
1426                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1427                 if (err)
1428                         return err;
1429                 spe->llc_miss_id = id;
1430                 arm_spe_set_event_name(evlist, id, "llc-miss");
1431                 id += 1;
1432
1433                 /* Last level cache access */
1434                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1435                 if (err)
1436                         return err;
1437                 spe->llc_access_id = id;
1438                 arm_spe_set_event_name(evlist, id, "llc-access");
1439                 id += 1;
1440         }
1441
1442         if (spe->synth_opts.tlb) {
1443                 spe->sample_tlb = true;
1444
1445                 /* TLB miss */
1446                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1447                 if (err)
1448                         return err;
1449                 spe->tlb_miss_id = id;
1450                 arm_spe_set_event_name(evlist, id, "tlb-miss");
1451                 id += 1;
1452
1453                 /* TLB access */
1454                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1455                 if (err)
1456                         return err;
1457                 spe->tlb_access_id = id;
1458                 arm_spe_set_event_name(evlist, id, "tlb-access");
1459                 id += 1;
1460         }
1461
1462         if (spe->synth_opts.branches) {
1463                 spe->sample_branch = true;
1464
1465                 /* Branch */
1466                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1467                 if (err)
1468                         return err;
1469                 spe->branch_id = id;
1470                 arm_spe_set_event_name(evlist, id, "branch");
1471                 id += 1;
1472         }
1473
1474         if (spe->synth_opts.remote_access) {
1475                 spe->sample_remote_access = true;
1476
1477                 /* Remote access */
1478                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1479                 if (err)
1480                         return err;
1481                 spe->remote_access_id = id;
1482                 arm_spe_set_event_name(evlist, id, "remote-access");
1483                 id += 1;
1484         }
1485
1486         if (spe->synth_opts.mem) {
1487                 spe->sample_memory = true;
1488
1489                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1490                 if (err)
1491                         return err;
1492                 spe->memory_id = id;
1493                 arm_spe_set_event_name(evlist, id, "memory");
1494                 id += 1;
1495         }
1496
1497         if (spe->synth_opts.instructions) {
1498                 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1499                         pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1500                         goto synth_instructions_out;
1501                 }
1502                 if (spe->synth_opts.period > 1)
1503                         pr_warning("Arm SPE has a hardware-based sample period.\n"
1504                                    "Additional instruction events will be discarded by --itrace\n");
1505
1506                 spe->sample_instructions = true;
1507                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1508                 attr.sample_period = spe->synth_opts.period;
1509                 spe->instructions_sample_period = attr.sample_period;
1510                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1511                 if (err)
1512                         return err;
1513                 spe->instructions_id = id;
1514                 arm_spe_set_event_name(evlist, id, "instructions");
1515         }
1516 synth_instructions_out:
1517
1518         return 0;
1519 }
1520
1521 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1522 {
1523         u64 midr;
1524         int i;
1525
1526         if (!nr_cpu)
1527                 return false;
1528
1529         for (i = 0; i < nr_cpu; i++) {
1530                 if (!metadata[i])
1531                         return false;
1532
1533                 if (i == 0) {
1534                         midr = metadata[i][ARM_SPE_CPU_MIDR];
1535                         continue;
1536                 }
1537
1538                 if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1539                         return false;
1540         }
1541
1542         return true;
1543 }
1544
1545 int arm_spe_process_auxtrace_info(union perf_event *event,
1546                                   struct perf_session *session)
1547 {
1548         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1549         size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1550         struct perf_record_time_conv *tc = &session->time_conv;
1551         struct arm_spe *spe;
1552         u64 **metadata = NULL;
1553         u64 metadata_ver;
1554         int nr_cpu, err;
1555
1556         if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1557                                         min_sz)
1558                 return -EINVAL;
1559
1560         metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1561                                            &nr_cpu);
1562         if (!metadata && metadata_ver != 1) {
1563                 pr_err("Failed to parse Arm SPE metadata.\n");
1564                 return -EINVAL;
1565         }
1566
1567         spe = zalloc(sizeof(struct arm_spe));
1568         if (!spe) {
1569                 err = -ENOMEM;
1570                 goto err_free_metadata;
1571         }
1572
1573         err = auxtrace_queues__init(&spe->queues);
1574         if (err)
1575                 goto err_free;
1576
1577         spe->session = session;
1578         spe->machine = &session->machines.host; /* No kvm support */
1579         spe->auxtrace_type = auxtrace_info->type;
1580         if (metadata_ver == 1)
1581                 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1582         else
1583                 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1584         spe->metadata = metadata;
1585         spe->metadata_ver = metadata_ver;
1586         spe->metadata_nr_cpu = nr_cpu;
1587         spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1588
1589         spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1590
1591         /*
1592          * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1593          * and the parameters for hardware clock are stored in the session
1594          * context.  Passes these parameters to the struct perf_tsc_conversion
1595          * in "spe->tc", which is used for later conversion between clock
1596          * counter and timestamp.
1597          *
1598          * For backward compatibility, copies the fields starting from
1599          * "time_cycles" only if they are contained in the event.
1600          */
1601         spe->tc.time_shift = tc->time_shift;
1602         spe->tc.time_mult = tc->time_mult;
1603         spe->tc.time_zero = tc->time_zero;
1604
1605         if (event_contains(*tc, time_cycles)) {
1606                 spe->tc.time_cycles = tc->time_cycles;
1607                 spe->tc.time_mask = tc->time_mask;
1608                 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1609                 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1610         }
1611
1612         spe->auxtrace.process_event = arm_spe_process_event;
1613         spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1614         spe->auxtrace.flush_events = arm_spe_flush;
1615         spe->auxtrace.free_events = arm_spe_free_events;
1616         spe->auxtrace.free = arm_spe_free;
1617         spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1618         session->auxtrace = &spe->auxtrace;
1619
1620         arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1621
1622         if (dump_trace)
1623                 return 0;
1624
1625         if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1626                 spe->synth_opts = *session->itrace_synth_opts;
1627         else
1628                 itrace_synth_opts__set_default(&spe->synth_opts, false);
1629
1630         err = arm_spe_synth_events(spe, session);
1631         if (err)
1632                 goto err_free_queues;
1633
1634         err = auxtrace_queues__process_index(&spe->queues, session);
1635         if (err)
1636                 goto err_free_queues;
1637
1638         if (spe->queues.populated)
1639                 spe->data_queued = true;
1640
1641         return 0;
1642
1643 err_free_queues:
1644         auxtrace_queues__free(&spe->queues);
1645         session->auxtrace = NULL;
1646 err_free:
1647         free(spe);
1648 err_free_metadata:
1649         arm_spe__free_metadata(metadata, nr_cpu);
1650         return err;
1651 }
This page took 0.131143 seconds and 4 git commands to generate.