]> Git Repo - linux.git/blob - tools/perf/util/cs-etm.c
Linux 6.14-rc3
[linux.git] / tools / perf / util / cs-etm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(C) 2015-2018 Linaro Limited.
4  *
5  * Author: Tor Jeremiassen <[email protected]>
6  * Author: Mathieu Poirier <[email protected]>
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17
18 #include <stdlib.h>
19
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42
43 struct cs_etm_auxtrace {
44         struct auxtrace auxtrace;
45         struct auxtrace_queues queues;
46         struct auxtrace_heap heap;
47         struct itrace_synth_opts synth_opts;
48         struct perf_session *session;
49         struct perf_tsc_conversion tc;
50
51         /*
52          * Timeless has no timestamps in the trace so overlapping mmap lookups
53          * are less accurate but produces smaller trace data. We use context IDs
54          * in the trace instead of matching timestamps with fork records so
55          * they're not really needed in the general case. Overlapping mmaps
56          * happen in cases like between a fork and an exec.
57          */
58         bool timeless_decoding;
59
60         /*
61          * Per-thread ignores the trace channel ID and instead assumes that
62          * everything in a buffer comes from the same process regardless of
63          * which CPU it ran on. It also implies no context IDs so the TID is
64          * taken from the auxtrace buffer.
65          */
66         bool per_thread_decoding;
67         bool snapshot_mode;
68         bool data_queued;
69         bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71         int num_cpu;
72         u64 latest_kernel_timestamp;
73         u32 auxtrace_type;
74         u64 branches_sample_type;
75         u64 branches_id;
76         u64 instructions_sample_type;
77         u64 instructions_sample_period;
78         u64 instructions_id;
79         u64 **metadata;
80         unsigned int pmu_type;
81         enum cs_etm_pid_fmt pid_fmt;
82 };
83
84 struct cs_etm_traceid_queue {
85         u8 trace_chan_id;
86         u64 period_instructions;
87         size_t last_branch_pos;
88         union perf_event *event_buf;
89         struct thread *thread;
90         struct thread *prev_packet_thread;
91         ocsd_ex_level prev_packet_el;
92         ocsd_ex_level el;
93         struct branch_stack *last_branch;
94         struct branch_stack *last_branch_rb;
95         struct cs_etm_packet *prev_packet;
96         struct cs_etm_packet *packet;
97         struct cs_etm_packet_queue packet_queue;
98 };
99
100 enum cs_etm_format {
101         UNSET,
102         FORMATTED,
103         UNFORMATTED
104 };
105
106 struct cs_etm_queue {
107         struct cs_etm_auxtrace *etm;
108         struct cs_etm_decoder *decoder;
109         struct auxtrace_buffer *buffer;
110         unsigned int queue_nr;
111         u8 pending_timestamp_chan_id;
112         enum cs_etm_format format;
113         u64 offset;
114         const unsigned char *buf;
115         size_t buf_len, buf_used;
116         /* Conversion between traceID and index in traceid_queues array */
117         struct intlist *traceid_queues_list;
118         struct cs_etm_traceid_queue **traceid_queues;
119         /* Conversion between traceID and metadata pointers */
120         struct intlist *traceid_list;
121         /*
122          * Same as traceid_list, but traceid_list may be a reference to another
123          * queue's which has a matching sink ID.
124          */
125         struct intlist *own_traceid_list;
126         u32 sink_id;
127 };
128
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131                                            pid_t tid);
132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
140
141 /*
142  * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143  * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
144  * encode the etm queue number as the upper 16 bit and the channel as
145  * the lower 16 bit.
146  */
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
148                       (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
152
153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154 {
155         etmidr &= ETMIDR_PTM_VERSION;
156
157         if (etmidr == ETMIDR_PTM_VERSION)
158                 return CS_ETM_PROTO_PTM;
159
160         return CS_ETM_PROTO_ETMV3;
161 }
162
163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164 {
165         struct int_node *inode;
166         u64 *metadata;
167
168         inode = intlist__find(etmq->traceid_list, trace_chan_id);
169         if (!inode)
170                 return -EINVAL;
171
172         metadata = inode->priv;
173         *magic = metadata[CS_ETM_MAGIC];
174         return 0;
175 }
176
177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178 {
179         struct int_node *inode;
180         u64 *metadata;
181
182         inode = intlist__find(etmq->traceid_list, trace_chan_id);
183         if (!inode)
184                 return -EINVAL;
185
186         metadata = inode->priv;
187         *cpu = (int)metadata[CS_ETM_CPU];
188         return 0;
189 }
190
191 /*
192  * The returned PID format is presented as an enum:
193  *
194  *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195  *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196  *   CS_ETM_PIDFMT_NONE: No context IDs
197  *
198  * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199  * are enabled at the same time when the session runs on an EL2 kernel.
200  * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201  * recorded in the trace data, the tool will selectively use
202  * CONTEXTIDR_EL2 as PID.
203  *
204  * The result is cached in etm->pid_fmt so this function only needs to be called
205  * when processing the aux info.
206  */
207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208 {
209         u64 val;
210
211         if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212                 val = metadata[CS_ETM_ETMCR];
213                 /* CONTEXTIDR is traced */
214                 if (val & BIT(ETM_OPT_CTXTID))
215                         return CS_ETM_PIDFMT_CTXTID;
216         } else {
217                 val = metadata[CS_ETMV4_TRCCONFIGR];
218                 /* CONTEXTIDR_EL2 is traced */
219                 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220                         return CS_ETM_PIDFMT_CTXTID2;
221                 /* CONTEXTIDR_EL1 is traced */
222                 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223                         return CS_ETM_PIDFMT_CTXTID;
224         }
225
226         return CS_ETM_PIDFMT_NONE;
227 }
228
229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230 {
231         return etmq->etm->pid_fmt;
232 }
233
234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235                                         u8 trace_chan_id, u64 *cpu_metadata)
236 {
237         /* Get an RB node for this CPU */
238         struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239
240         /* Something went wrong, no need to continue */
241         if (!inode)
242                 return -ENOMEM;
243
244         /* Disallow re-mapping a different traceID to metadata pair. */
245         if (inode->priv) {
246                 u64 *curr_cpu_data = inode->priv;
247                 u8 curr_chan_id;
248                 int err;
249
250                 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251                         /*
252                          * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253                          * are expected (but not supported) in per-thread mode,
254                          * rather than signifying an error.
255                          */
256                         if (etmq->etm->per_thread_decoding)
257                                 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258                         else
259                                 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260
261                         return -EINVAL;
262                 }
263
264                 /* check that the mapped ID matches */
265                 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266                 if (err)
267                         return err;
268
269                 if (curr_chan_id != trace_chan_id) {
270                         pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271                         return -EINVAL;
272                 }
273
274                 /* Skip re-adding the same mappings if everything matched */
275                 return 0;
276         }
277
278         /* Not one we've seen before, associate the traceID with the metadata pointer */
279         inode->priv = cpu_metadata;
280
281         return 0;
282 }
283
284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285 {
286         if (etm->per_thread_decoding)
287                 return etm->queues.queue_array[0].priv;
288         else
289                 return etm->queues.queue_array[cpu].priv;
290 }
291
292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293                                    u64 *cpu_metadata)
294 {
295         struct cs_etm_queue *etmq;
296
297         /*
298          * If the queue is unformatted then only save one mapping in the
299          * queue associated with that CPU so only one decoder is made.
300          */
301         etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302         if (etmq->format == UNFORMATTED)
303                 return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304                                                     cpu_metadata);
305
306         /*
307          * Otherwise, version 0 trace IDs are global so save them into every
308          * queue.
309          */
310         for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311                 int ret;
312
313                 etmq = etm->queues.queue_array[i].priv;
314                 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315                                                    cpu_metadata);
316                 if (ret)
317                         return ret;
318         }
319
320         return 0;
321 }
322
323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324                                        u64 hw_id)
325 {
326         int err;
327         u64 *cpu_data;
328         u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329
330         cpu_data = get_cpu_data(etm, cpu);
331         if (cpu_data == NULL)
332                 return -EINVAL;
333
334         err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335         if (err)
336                 return err;
337
338         /*
339          * if we are picking up the association from the packet, need to plug
340          * the correct trace ID into the metadata for setting up decoders later.
341          */
342         return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343 }
344
345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346                                          u64 hw_id)
347 {
348         struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349         int ret;
350         u64 *cpu_data;
351         u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352         u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353
354         /*
355          * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356          * let it pass for now until an actual overlapping trace ID is hit. In
357          * most cases IDs won't overlap even if the sink changes.
358          */
359         if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360             etmq->sink_id != sink_id) {
361                 pr_err("CS_ETM: mismatch between sink IDs\n");
362                 return -EINVAL;
363         }
364
365         etmq->sink_id = sink_id;
366
367         /* Find which other queues use this sink and link their ID maps */
368         for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369                 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370
371                 /* Different sinks, skip */
372                 if (other_etmq->sink_id != etmq->sink_id)
373                         continue;
374
375                 /* Already linked, skip */
376                 if (other_etmq->traceid_list == etmq->traceid_list)
377                         continue;
378
379                 /* At the point of first linking, this one should be empty */
380                 if (!intlist__empty(etmq->traceid_list)) {
381                         pr_err("CS_ETM: Can't link populated trace ID lists\n");
382                         return -EINVAL;
383                 }
384
385                 etmq->own_traceid_list = NULL;
386                 intlist__delete(etmq->traceid_list);
387                 etmq->traceid_list = other_etmq->traceid_list;
388                 break;
389         }
390
391         cpu_data = get_cpu_data(etm, cpu);
392         ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393         if (ret)
394                 return ret;
395
396         ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397         if (ret)
398                 return ret;
399
400         return 0;
401 }
402
403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404 {
405         u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406
407         switch (cs_etm_magic) {
408         case __perf_cs_etmv3_magic:
409                 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410                                       CORESIGHT_TRACE_ID_VAL_MASK);
411                 break;
412         case __perf_cs_etmv4_magic:
413         case __perf_cs_ete_magic:
414                 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415                                       CORESIGHT_TRACE_ID_VAL_MASK);
416                 break;
417         default:
418                 return -EINVAL;
419         }
420         return 0;
421 }
422
423 /*
424  * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425  */
426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427 {
428         u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429
430         switch (cs_etm_magic) {
431         case __perf_cs_etmv3_magic:
432                  cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433                 break;
434         case __perf_cs_etmv4_magic:
435         case __perf_cs_ete_magic:
436                 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437                 break;
438
439         default:
440                 return -EINVAL;
441         }
442         return 0;
443 }
444
445 /*
446  * Get a metadata index for a specific cpu from an array.
447  *
448  */
449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450 {
451         int i;
452
453         for (i = 0; i < etm->num_cpu; i++) {
454                 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455                         return i;
456                 }
457         }
458
459         return -1;
460 }
461
462 /*
463  * Get a metadata for a specific cpu from an array.
464  *
465  */
466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467 {
468         int idx = get_cpu_data_idx(etm, cpu);
469
470         return (idx != -1) ? etm->metadata[idx] : NULL;
471 }
472
473 /*
474  * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475  *
476  * The payload associates the Trace ID and the CPU.
477  * The routine is tolerant of seeing multiple packets with the same association,
478  * but a CPU / Trace ID association changing during a session is an error.
479  */
480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481                                             union perf_event *event)
482 {
483         struct cs_etm_auxtrace *etm;
484         struct perf_sample sample;
485         struct evsel *evsel;
486         u64 hw_id;
487         int cpu, version, err;
488
489         /* extract and parse the HW ID */
490         hw_id = event->aux_output_hw_id.hw_id;
491         version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492
493         /* check that we can handle this version */
494         if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495                 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496                        version);
497                 return -EINVAL;
498         }
499
500         /* get access to the etm metadata */
501         etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502         if (!etm || !etm->metadata)
503                 return -EINVAL;
504
505         /* parse the sample to get the CPU */
506         evsel = evlist__event2evsel(session->evlist, event);
507         if (!evsel)
508                 return -EINVAL;
509         err = evsel__parse_sample(evsel, event, &sample);
510         if (err)
511                 return err;
512         cpu = sample.cpu;
513         if (cpu == -1) {
514                 /* no CPU in the sample - possibly recorded with an old version of perf */
515                 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
516                 return -EINVAL;
517         }
518
519         if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
520                 return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
521
522         return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
523 }
524
525 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
526                                               u8 trace_chan_id)
527 {
528         /*
529          * When a timestamp packet is encountered the backend code
530          * is stopped so that the front end has time to process packets
531          * that were accumulated in the traceID queue.  Since there can
532          * be more than one channel per cs_etm_queue, we need to specify
533          * what traceID queue needs servicing.
534          */
535         etmq->pending_timestamp_chan_id = trace_chan_id;
536 }
537
538 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
539                                       u8 *trace_chan_id)
540 {
541         struct cs_etm_packet_queue *packet_queue;
542
543         if (!etmq->pending_timestamp_chan_id)
544                 return 0;
545
546         if (trace_chan_id)
547                 *trace_chan_id = etmq->pending_timestamp_chan_id;
548
549         packet_queue = cs_etm__etmq_get_packet_queue(etmq,
550                                                      etmq->pending_timestamp_chan_id);
551         if (!packet_queue)
552                 return 0;
553
554         /* Acknowledge pending status */
555         etmq->pending_timestamp_chan_id = 0;
556
557         /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
558         return packet_queue->cs_timestamp;
559 }
560
561 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
562 {
563         int i;
564
565         queue->head = 0;
566         queue->tail = 0;
567         queue->packet_count = 0;
568         for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
569                 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
570                 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
571                 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
572                 queue->packet_buffer[i].instr_count = 0;
573                 queue->packet_buffer[i].last_instr_taken_branch = false;
574                 queue->packet_buffer[i].last_instr_size = 0;
575                 queue->packet_buffer[i].last_instr_type = 0;
576                 queue->packet_buffer[i].last_instr_subtype = 0;
577                 queue->packet_buffer[i].last_instr_cond = 0;
578                 queue->packet_buffer[i].flags = 0;
579                 queue->packet_buffer[i].exception_number = UINT32_MAX;
580                 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
581                 queue->packet_buffer[i].cpu = INT_MIN;
582         }
583 }
584
585 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
586 {
587         int idx;
588         struct int_node *inode;
589         struct cs_etm_traceid_queue *tidq;
590         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
591
592         intlist__for_each_entry(inode, traceid_queues_list) {
593                 idx = (int)(intptr_t)inode->priv;
594                 tidq = etmq->traceid_queues[idx];
595                 cs_etm__clear_packet_queue(&tidq->packet_queue);
596         }
597 }
598
599 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
600                                       struct cs_etm_traceid_queue *tidq,
601                                       u8 trace_chan_id)
602 {
603         int rc = -ENOMEM;
604         struct auxtrace_queue *queue;
605         struct cs_etm_auxtrace *etm = etmq->etm;
606
607         cs_etm__clear_packet_queue(&tidq->packet_queue);
608
609         queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
610         tidq->trace_chan_id = trace_chan_id;
611         tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
612         tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
613                                                queue->tid);
614         tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
615
616         tidq->packet = zalloc(sizeof(struct cs_etm_packet));
617         if (!tidq->packet)
618                 goto out;
619
620         tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
621         if (!tidq->prev_packet)
622                 goto out_free;
623
624         if (etm->synth_opts.last_branch) {
625                 size_t sz = sizeof(struct branch_stack);
626
627                 sz += etm->synth_opts.last_branch_sz *
628                       sizeof(struct branch_entry);
629                 tidq->last_branch = zalloc(sz);
630                 if (!tidq->last_branch)
631                         goto out_free;
632                 tidq->last_branch_rb = zalloc(sz);
633                 if (!tidq->last_branch_rb)
634                         goto out_free;
635         }
636
637         tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
638         if (!tidq->event_buf)
639                 goto out_free;
640
641         return 0;
642
643 out_free:
644         zfree(&tidq->last_branch_rb);
645         zfree(&tidq->last_branch);
646         zfree(&tidq->prev_packet);
647         zfree(&tidq->packet);
648 out:
649         return rc;
650 }
651
652 static struct cs_etm_traceid_queue
653 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
654 {
655         int idx;
656         struct int_node *inode;
657         struct intlist *traceid_queues_list;
658         struct cs_etm_traceid_queue *tidq, **traceid_queues;
659         struct cs_etm_auxtrace *etm = etmq->etm;
660
661         if (etm->per_thread_decoding)
662                 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
663
664         traceid_queues_list = etmq->traceid_queues_list;
665
666         /*
667          * Check if the traceid_queue exist for this traceID by looking
668          * in the queue list.
669          */
670         inode = intlist__find(traceid_queues_list, trace_chan_id);
671         if (inode) {
672                 idx = (int)(intptr_t)inode->priv;
673                 return etmq->traceid_queues[idx];
674         }
675
676         /* We couldn't find a traceid_queue for this traceID, allocate one */
677         tidq = malloc(sizeof(*tidq));
678         if (!tidq)
679                 return NULL;
680
681         memset(tidq, 0, sizeof(*tidq));
682
683         /* Get a valid index for the new traceid_queue */
684         idx = intlist__nr_entries(traceid_queues_list);
685         /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
686         inode = intlist__findnew(traceid_queues_list, trace_chan_id);
687         if (!inode)
688                 goto out_free;
689
690         /* Associate this traceID with this index */
691         inode->priv = (void *)(intptr_t)idx;
692
693         if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
694                 goto out_free;
695
696         /* Grow the traceid_queues array by one unit */
697         traceid_queues = etmq->traceid_queues;
698         traceid_queues = reallocarray(traceid_queues,
699                                       idx + 1,
700                                       sizeof(*traceid_queues));
701
702         /*
703          * On failure reallocarray() returns NULL and the original block of
704          * memory is left untouched.
705          */
706         if (!traceid_queues)
707                 goto out_free;
708
709         traceid_queues[idx] = tidq;
710         etmq->traceid_queues = traceid_queues;
711
712         return etmq->traceid_queues[idx];
713
714 out_free:
715         /*
716          * Function intlist__remove() removes the inode from the list
717          * and delete the memory associated to it.
718          */
719         intlist__remove(traceid_queues_list, inode);
720         free(tidq);
721
722         return NULL;
723 }
724
725 struct cs_etm_packet_queue
726 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
727 {
728         struct cs_etm_traceid_queue *tidq;
729
730         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
731         if (tidq)
732                 return &tidq->packet_queue;
733
734         return NULL;
735 }
736
737 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
738                                 struct cs_etm_traceid_queue *tidq)
739 {
740         struct cs_etm_packet *tmp;
741
742         if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
743             etm->synth_opts.instructions) {
744                 /*
745                  * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
746                  * the next incoming packet.
747                  *
748                  * Threads and exception levels are also tracked for both the
749                  * previous and current packets. This is because the previous
750                  * packet is used for the 'from' IP for branch samples, so the
751                  * thread at that time must also be assigned to that sample.
752                  * Across discontinuity packets the thread can change, so by
753                  * tracking the thread for the previous packet the branch sample
754                  * will have the correct info.
755                  */
756                 tmp = tidq->packet;
757                 tidq->packet = tidq->prev_packet;
758                 tidq->prev_packet = tmp;
759                 tidq->prev_packet_el = tidq->el;
760                 thread__put(tidq->prev_packet_thread);
761                 tidq->prev_packet_thread = thread__get(tidq->thread);
762         }
763 }
764
765 static void cs_etm__packet_dump(const char *pkt_string, void *data)
766 {
767         const char *color = PERF_COLOR_BLUE;
768         int len = strlen(pkt_string);
769         struct cs_etm_queue *etmq = data;
770         char queue_nr[64];
771
772         if (verbose)
773                 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr);
774         else
775                 queue_nr[0] = '\0';
776
777         if (len && (pkt_string[len-1] == '\n'))
778                 color_fprintf(stdout, color, "  %s%s", queue_nr, pkt_string);
779         else
780                 color_fprintf(stdout, color, "  %s%s\n", queue_nr, pkt_string);
781
782         fflush(stdout);
783 }
784
785 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
786                                           u64 *metadata, u32 etmidr)
787 {
788         t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
789         t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
790         t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
791 }
792
793 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
794                                           u64 *metadata)
795 {
796         t_params->protocol = CS_ETM_PROTO_ETMV4i;
797         t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
798         t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
799         t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
800         t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
801         t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
802         t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
803 }
804
805 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
806                                         u64 *metadata)
807 {
808         t_params->protocol = CS_ETM_PROTO_ETE;
809         t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
810         t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
811         t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
812         t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
813         t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
814         t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
815         t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
816 }
817
818 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
819                                      struct cs_etm_queue *etmq)
820 {
821         struct int_node *inode;
822
823         intlist__for_each_entry(inode, etmq->traceid_list) {
824                 u64 *metadata = inode->priv;
825                 u64 architecture = metadata[CS_ETM_MAGIC];
826                 u32 etmidr;
827
828                 switch (architecture) {
829                 case __perf_cs_etmv3_magic:
830                         etmidr = metadata[CS_ETM_ETMIDR];
831                         cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
832                         break;
833                 case __perf_cs_etmv4_magic:
834                         cs_etm__set_trace_param_etmv4(t_params++, metadata);
835                         break;
836                 case __perf_cs_ete_magic:
837                         cs_etm__set_trace_param_ete(t_params++, metadata);
838                         break;
839                 default:
840                         return -EINVAL;
841                 }
842         }
843
844         return 0;
845 }
846
847 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
848                                        struct cs_etm_queue *etmq,
849                                        enum cs_etm_decoder_operation mode)
850 {
851         int ret = -EINVAL;
852
853         if (!(mode < CS_ETM_OPERATION_MAX))
854                 goto out;
855
856         d_params->packet_printer = cs_etm__packet_dump;
857         d_params->operation = mode;
858         d_params->data = etmq;
859         d_params->formatted = etmq->format == FORMATTED;
860         d_params->fsyncs = false;
861         d_params->hsyncs = false;
862         d_params->frame_aligned = true;
863
864         ret = 0;
865 out:
866         return ret;
867 }
868
869 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
870                                struct auxtrace_buffer *buffer)
871 {
872         int ret;
873         const char *color = PERF_COLOR_BLUE;
874         size_t buffer_used = 0;
875
876         fprintf(stdout, "\n");
877         color_fprintf(stdout, color,
878                      ". ... CoreSight %s Trace data: size %#zx bytes\n",
879                      cs_etm_decoder__get_name(etmq->decoder), buffer->size);
880
881         do {
882                 size_t consumed;
883
884                 ret = cs_etm_decoder__process_data_block(
885                                 etmq->decoder, buffer->offset,
886                                 &((u8 *)buffer->data)[buffer_used],
887                                 buffer->size - buffer_used, &consumed);
888                 if (ret)
889                         break;
890
891                 buffer_used += consumed;
892         } while (buffer_used < buffer->size);
893
894         cs_etm_decoder__reset(etmq->decoder);
895 }
896
897 static int cs_etm__flush_events(struct perf_session *session,
898                                 const struct perf_tool *tool)
899 {
900         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
901                                                    struct cs_etm_auxtrace,
902                                                    auxtrace);
903         if (dump_trace)
904                 return 0;
905
906         if (!tool->ordered_events)
907                 return -EINVAL;
908
909         if (etm->timeless_decoding) {
910                 /*
911                  * Pass tid = -1 to process all queues. But likely they will have
912                  * already been processed on PERF_RECORD_EXIT anyway.
913                  */
914                 return cs_etm__process_timeless_queues(etm, -1);
915         }
916
917         return cs_etm__process_timestamped_queues(etm);
918 }
919
920 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
921 {
922         int idx;
923         uintptr_t priv;
924         struct int_node *inode, *tmp;
925         struct cs_etm_traceid_queue *tidq;
926         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
927
928         intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
929                 priv = (uintptr_t)inode->priv;
930                 idx = priv;
931
932                 /* Free this traceid_queue from the array */
933                 tidq = etmq->traceid_queues[idx];
934                 thread__zput(tidq->thread);
935                 thread__zput(tidq->prev_packet_thread);
936                 zfree(&tidq->event_buf);
937                 zfree(&tidq->last_branch);
938                 zfree(&tidq->last_branch_rb);
939                 zfree(&tidq->prev_packet);
940                 zfree(&tidq->packet);
941                 zfree(&tidq);
942
943                 /*
944                  * Function intlist__remove() removes the inode from the list
945                  * and delete the memory associated to it.
946                  */
947                 intlist__remove(traceid_queues_list, inode);
948         }
949
950         /* Then the RB tree itself */
951         intlist__delete(traceid_queues_list);
952         etmq->traceid_queues_list = NULL;
953
954         /* finally free the traceid_queues array */
955         zfree(&etmq->traceid_queues);
956 }
957
958 static void cs_etm__free_queue(void *priv)
959 {
960         struct int_node *inode, *tmp;
961         struct cs_etm_queue *etmq = priv;
962
963         if (!etmq)
964                 return;
965
966         cs_etm_decoder__free(etmq->decoder);
967         cs_etm__free_traceid_queues(etmq);
968
969         if (etmq->own_traceid_list) {
970                 /* First remove all traceID/metadata nodes for the RB tree */
971                 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
972                         intlist__remove(etmq->own_traceid_list, inode);
973
974                 /* Then the RB tree itself */
975                 intlist__delete(etmq->own_traceid_list);
976         }
977
978         free(etmq);
979 }
980
981 static void cs_etm__free_events(struct perf_session *session)
982 {
983         unsigned int i;
984         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
985                                                    struct cs_etm_auxtrace,
986                                                    auxtrace);
987         struct auxtrace_queues *queues = &aux->queues;
988
989         for (i = 0; i < queues->nr_queues; i++) {
990                 cs_etm__free_queue(queues->queue_array[i].priv);
991                 queues->queue_array[i].priv = NULL;
992         }
993
994         auxtrace_queues__free(queues);
995 }
996
997 static void cs_etm__free(struct perf_session *session)
998 {
999         int i;
1000         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1001                                                    struct cs_etm_auxtrace,
1002                                                    auxtrace);
1003         cs_etm__free_events(session);
1004         session->auxtrace = NULL;
1005
1006         for (i = 0; i < aux->num_cpu; i++)
1007                 zfree(&aux->metadata[i]);
1008
1009         zfree(&aux->metadata);
1010         zfree(&aux);
1011 }
1012
1013 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1014                                       struct evsel *evsel)
1015 {
1016         struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1017                                                    struct cs_etm_auxtrace,
1018                                                    auxtrace);
1019
1020         return evsel->core.attr.type == aux->pmu_type;
1021 }
1022
1023 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1024                                            ocsd_ex_level el)
1025 {
1026         enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1027
1028         /*
1029          * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1030          * running at EL1 assume everything is the host.
1031          */
1032         if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1033                 return &etmq->etm->session->machines.host;
1034
1035         /*
1036          * Not perfect, but otherwise assume anything in EL1 is the default
1037          * guest, and everything else is the host. Distinguishing between guest
1038          * and host userspaces isn't currently supported either. Neither is
1039          * multiple guest support. All this does is reduce the likeliness of
1040          * decode errors where we look into the host kernel maps when it should
1041          * have been the guest maps.
1042          */
1043         switch (el) {
1044         case ocsd_EL1:
1045                 return machines__find_guest(&etmq->etm->session->machines,
1046                                             DEFAULT_GUEST_KERNEL_ID);
1047         case ocsd_EL3:
1048         case ocsd_EL2:
1049         case ocsd_EL0:
1050         case ocsd_EL_unknown:
1051         default:
1052                 return &etmq->etm->session->machines.host;
1053         }
1054 }
1055
1056 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1057                            ocsd_ex_level el)
1058 {
1059         struct machine *machine = cs_etm__get_machine(etmq, el);
1060
1061         if (address >= machine__kernel_start(machine)) {
1062                 if (machine__is_host(machine))
1063                         return PERF_RECORD_MISC_KERNEL;
1064                 else
1065                         return PERF_RECORD_MISC_GUEST_KERNEL;
1066         } else {
1067                 if (machine__is_host(machine))
1068                         return PERF_RECORD_MISC_USER;
1069                 else {
1070                         /*
1071                          * Can't really happen at the moment because
1072                          * cs_etm__get_machine() will always return
1073                          * machines.host for any non EL1 trace.
1074                          */
1075                         return PERF_RECORD_MISC_GUEST_USER;
1076                 }
1077         }
1078 }
1079
1080 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1081                               u64 address, size_t size, u8 *buffer,
1082                               const ocsd_mem_space_acc_t mem_space)
1083 {
1084         u8  cpumode;
1085         u64 offset;
1086         int len;
1087         struct addr_location al;
1088         struct dso *dso;
1089         struct cs_etm_traceid_queue *tidq;
1090         int ret = 0;
1091
1092         if (!etmq)
1093                 return 0;
1094
1095         addr_location__init(&al);
1096         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1097         if (!tidq)
1098                 goto out;
1099
1100         /*
1101          * We've already tracked EL along side the PID in cs_etm__set_thread()
1102          * so double check that it matches what OpenCSD thinks as well. It
1103          * doesn't distinguish between EL0 and EL1 for this mem access callback
1104          * so we had to do the extra tracking. Skip validation if it's any of
1105          * the 'any' values.
1106          */
1107         if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1108               mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1109                 if (mem_space & OCSD_MEM_SPACE_EL1N) {
1110                         /* Includes both non secure EL1 and EL0 */
1111                         assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1112                 } else if (mem_space & OCSD_MEM_SPACE_EL2)
1113                         assert(tidq->el == ocsd_EL2);
1114                 else if (mem_space & OCSD_MEM_SPACE_EL3)
1115                         assert(tidq->el == ocsd_EL3);
1116         }
1117
1118         cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1119
1120         if (!thread__find_map(tidq->thread, cpumode, address, &al))
1121                 goto out;
1122
1123         dso = map__dso(al.map);
1124         if (!dso)
1125                 goto out;
1126
1127         if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1128             dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1129                 goto out;
1130
1131         offset = map__map_ip(al.map, address);
1132
1133         map__load(al.map);
1134
1135         len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1136                                     offset, buffer, size);
1137
1138         if (len <= 0) {
1139                 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1140                                  "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1141                 if (!dso__auxtrace_warned(dso)) {
1142                         pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1143                                 address,
1144                                 dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1145                         dso__set_auxtrace_warned(dso);
1146                 }
1147                 goto out;
1148         }
1149         ret = len;
1150 out:
1151         addr_location__exit(&al);
1152         return ret;
1153 }
1154
1155 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1156 {
1157         struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1158         if (!etmq)
1159                 return NULL;
1160
1161         etmq->traceid_queues_list = intlist__new(NULL);
1162         if (!etmq->traceid_queues_list)
1163                 goto out_free;
1164
1165         /*
1166          * Create an RB tree for traceID-metadata tuple.  Since the conversion
1167          * has to be made for each packet that gets decoded, optimizing access
1168          * in anything other than a sequential array is worth doing.
1169          */
1170         etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1171         if (!etmq->traceid_list)
1172                 goto out_free;
1173
1174         return etmq;
1175
1176 out_free:
1177         intlist__delete(etmq->traceid_queues_list);
1178         free(etmq);
1179
1180         return NULL;
1181 }
1182
1183 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1184                                struct auxtrace_queue *queue,
1185                                unsigned int queue_nr)
1186 {
1187         struct cs_etm_queue *etmq = queue->priv;
1188
1189         if (etmq)
1190                 return 0;
1191
1192         etmq = cs_etm__alloc_queue();
1193
1194         if (!etmq)
1195                 return -ENOMEM;
1196
1197         queue->priv = etmq;
1198         etmq->etm = etm;
1199         etmq->queue_nr = queue_nr;
1200         queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1201         etmq->offset = 0;
1202         etmq->sink_id = SINK_UNSET;
1203
1204         return 0;
1205 }
1206
1207 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1208                                             struct cs_etm_queue *etmq,
1209                                             unsigned int queue_nr)
1210 {
1211         int ret = 0;
1212         unsigned int cs_queue_nr;
1213         u8 trace_chan_id;
1214         u64 cs_timestamp;
1215
1216         /*
1217          * We are under a CPU-wide trace scenario.  As such we need to know
1218          * when the code that generated the traces started to execute so that
1219          * it can be correlated with execution on other CPUs.  So we get a
1220          * handle on the beginning of traces and decode until we find a
1221          * timestamp.  The timestamp is then added to the auxtrace min heap
1222          * in order to know what nibble (of all the etmqs) to decode first.
1223          */
1224         while (1) {
1225                 /*
1226                  * Fetch an aux_buffer from this etmq.  Bail if no more
1227                  * blocks or an error has been encountered.
1228                  */
1229                 ret = cs_etm__get_data_block(etmq);
1230                 if (ret <= 0)
1231                         goto out;
1232
1233                 /*
1234                  * Run decoder on the trace block.  The decoder will stop when
1235                  * encountering a CS timestamp, a full packet queue or the end of
1236                  * trace for that block.
1237                  */
1238                 ret = cs_etm__decode_data_block(etmq);
1239                 if (ret)
1240                         goto out;
1241
1242                 /*
1243                  * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1244                  * the timestamp calculation for us.
1245                  */
1246                 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1247
1248                 /* We found a timestamp, no need to continue. */
1249                 if (cs_timestamp)
1250                         break;
1251
1252                 /*
1253                  * We didn't find a timestamp so empty all the traceid packet
1254                  * queues before looking for another timestamp packet, either
1255                  * in the current data block or a new one.  Packets that were
1256                  * just decoded are useless since no timestamp has been
1257                  * associated with them.  As such simply discard them.
1258                  */
1259                 cs_etm__clear_all_packet_queues(etmq);
1260         }
1261
1262         /*
1263          * We have a timestamp.  Add it to the min heap to reflect when
1264          * instructions conveyed by the range packets of this traceID queue
1265          * started to execute.  Once the same has been done for all the traceID
1266          * queues of each etmq, redenring and decoding can start in
1267          * chronological order.
1268          *
1269          * Note that packets decoded above are still in the traceID's packet
1270          * queue and will be processed in cs_etm__process_timestamped_queues().
1271          */
1272         cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1273         ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1274 out:
1275         return ret;
1276 }
1277
1278 static inline
1279 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1280                                  struct cs_etm_traceid_queue *tidq)
1281 {
1282         struct branch_stack *bs_src = tidq->last_branch_rb;
1283         struct branch_stack *bs_dst = tidq->last_branch;
1284         size_t nr = 0;
1285
1286         /*
1287          * Set the number of records before early exit: ->nr is used to
1288          * determine how many branches to copy from ->entries.
1289          */
1290         bs_dst->nr = bs_src->nr;
1291
1292         /*
1293          * Early exit when there is nothing to copy.
1294          */
1295         if (!bs_src->nr)
1296                 return;
1297
1298         /*
1299          * As bs_src->entries is a circular buffer, we need to copy from it in
1300          * two steps.  First, copy the branches from the most recently inserted
1301          * branch ->last_branch_pos until the end of bs_src->entries buffer.
1302          */
1303         nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1304         memcpy(&bs_dst->entries[0],
1305                &bs_src->entries[tidq->last_branch_pos],
1306                sizeof(struct branch_entry) * nr);
1307
1308         /*
1309          * If we wrapped around at least once, the branches from the beginning
1310          * of the bs_src->entries buffer and until the ->last_branch_pos element
1311          * are older valid branches: copy them over.  The total number of
1312          * branches copied over will be equal to the number of branches asked by
1313          * the user in last_branch_sz.
1314          */
1315         if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1316                 memcpy(&bs_dst->entries[nr],
1317                        &bs_src->entries[0],
1318                        sizeof(struct branch_entry) * tidq->last_branch_pos);
1319         }
1320 }
1321
1322 static inline
1323 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1324 {
1325         tidq->last_branch_pos = 0;
1326         tidq->last_branch_rb->nr = 0;
1327 }
1328
1329 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1330                                          u8 trace_chan_id, u64 addr)
1331 {
1332         u8 instrBytes[2];
1333
1334         cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1335                            instrBytes, 0);
1336         /*
1337          * T32 instruction size is indicated by bits[15:11] of the first
1338          * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1339          * denote a 32-bit instruction.
1340          */
1341         return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1342 }
1343
1344 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1345 {
1346         /*
1347          * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1348          * appear in samples.
1349          */
1350         if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1351             packet->sample_type == CS_ETM_EXCEPTION)
1352                 return 0;
1353
1354         return packet->start_addr;
1355 }
1356
1357 static inline
1358 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1359 {
1360         /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1361         if (packet->sample_type == CS_ETM_DISCONTINUITY)
1362                 return 0;
1363
1364         return packet->end_addr - packet->last_instr_size;
1365 }
1366
1367 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1368                                      u64 trace_chan_id,
1369                                      const struct cs_etm_packet *packet,
1370                                      u64 offset)
1371 {
1372         if (packet->isa == CS_ETM_ISA_T32) {
1373                 u64 addr = packet->start_addr;
1374
1375                 while (offset) {
1376                         addr += cs_etm__t32_instr_size(etmq,
1377                                                        trace_chan_id, addr);
1378                         offset--;
1379                 }
1380                 return addr;
1381         }
1382
1383         /* Assume a 4 byte instruction size (A32/A64) */
1384         return packet->start_addr + offset * 4;
1385 }
1386
1387 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1388                                           struct cs_etm_traceid_queue *tidq)
1389 {
1390         struct branch_stack *bs = tidq->last_branch_rb;
1391         struct branch_entry *be;
1392
1393         /*
1394          * The branches are recorded in a circular buffer in reverse
1395          * chronological order: we start recording from the last element of the
1396          * buffer down.  After writing the first element of the stack, move the
1397          * insert position back to the end of the buffer.
1398          */
1399         if (!tidq->last_branch_pos)
1400                 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1401
1402         tidq->last_branch_pos -= 1;
1403
1404         be       = &bs->entries[tidq->last_branch_pos];
1405         be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1406         be->to   = cs_etm__first_executed_instr(tidq->packet);
1407         /* No support for mispredict */
1408         be->flags.mispred = 0;
1409         be->flags.predicted = 1;
1410
1411         /*
1412          * Increment bs->nr until reaching the number of last branches asked by
1413          * the user on the command line.
1414          */
1415         if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1416                 bs->nr += 1;
1417 }
1418
1419 static int cs_etm__inject_event(union perf_event *event,
1420                                struct perf_sample *sample, u64 type)
1421 {
1422         event->header.size = perf_event__sample_event_size(sample, type, 0);
1423         return perf_event__synthesize_sample(event, type, 0, sample);
1424 }
1425
1426
1427 static int
1428 cs_etm__get_trace(struct cs_etm_queue *etmq)
1429 {
1430         struct auxtrace_buffer *aux_buffer = etmq->buffer;
1431         struct auxtrace_buffer *old_buffer = aux_buffer;
1432         struct auxtrace_queue *queue;
1433
1434         queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1435
1436         aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1437
1438         /* If no more data, drop the previous auxtrace_buffer and return */
1439         if (!aux_buffer) {
1440                 if (old_buffer)
1441                         auxtrace_buffer__drop_data(old_buffer);
1442                 etmq->buf_len = 0;
1443                 return 0;
1444         }
1445
1446         etmq->buffer = aux_buffer;
1447
1448         /* If the aux_buffer doesn't have data associated, try to load it */
1449         if (!aux_buffer->data) {
1450                 /* get the file desc associated with the perf data file */
1451                 int fd = perf_data__fd(etmq->etm->session->data);
1452
1453                 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1454                 if (!aux_buffer->data)
1455                         return -ENOMEM;
1456         }
1457
1458         /* If valid, drop the previous buffer */
1459         if (old_buffer)
1460                 auxtrace_buffer__drop_data(old_buffer);
1461
1462         etmq->buf_used = 0;
1463         etmq->buf_len = aux_buffer->size;
1464         etmq->buf = aux_buffer->data;
1465
1466         return etmq->buf_len;
1467 }
1468
1469 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1470                                struct cs_etm_traceid_queue *tidq, pid_t tid,
1471                                ocsd_ex_level el)
1472 {
1473         struct machine *machine = cs_etm__get_machine(etmq, el);
1474
1475         if (tid != -1) {
1476                 thread__zput(tidq->thread);
1477                 tidq->thread = machine__find_thread(machine, -1, tid);
1478         }
1479
1480         /* Couldn't find a known thread */
1481         if (!tidq->thread)
1482                 tidq->thread = machine__idle_thread(machine);
1483
1484         tidq->el = el;
1485 }
1486
1487 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1488                             u8 trace_chan_id, ocsd_ex_level el)
1489 {
1490         struct cs_etm_traceid_queue *tidq;
1491
1492         tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1493         if (!tidq)
1494                 return -EINVAL;
1495
1496         cs_etm__set_thread(etmq, tidq, tid, el);
1497         return 0;
1498 }
1499
1500 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1501 {
1502         return !!etmq->etm->timeless_decoding;
1503 }
1504
1505 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1506                               u64 trace_chan_id,
1507                               const struct cs_etm_packet *packet,
1508                               struct perf_sample *sample)
1509 {
1510         /*
1511          * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1512          * packet, so directly bail out with 'insn_len' = 0.
1513          */
1514         if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1515                 sample->insn_len = 0;
1516                 return;
1517         }
1518
1519         /*
1520          * T32 instruction size might be 32-bit or 16-bit, decide by calling
1521          * cs_etm__t32_instr_size().
1522          */
1523         if (packet->isa == CS_ETM_ISA_T32)
1524                 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1525                                                           sample->ip);
1526         /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1527         else
1528                 sample->insn_len = 4;
1529
1530         cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1531                            (void *)sample->insn, 0);
1532 }
1533
1534 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1535 {
1536         struct cs_etm_auxtrace *etm = etmq->etm;
1537
1538         if (etm->has_virtual_ts)
1539                 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1540         else
1541                 return cs_timestamp;
1542 }
1543
1544 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1545                                                struct cs_etm_traceid_queue *tidq)
1546 {
1547         struct cs_etm_auxtrace *etm = etmq->etm;
1548         struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1549
1550         if (!etm->timeless_decoding && etm->has_virtual_ts)
1551                 return packet_queue->cs_timestamp;
1552         else
1553                 return etm->latest_kernel_timestamp;
1554 }
1555
1556 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1557                                             struct cs_etm_traceid_queue *tidq,
1558                                             u64 addr, u64 period)
1559 {
1560         int ret = 0;
1561         struct cs_etm_auxtrace *etm = etmq->etm;
1562         union perf_event *event = tidq->event_buf;
1563         struct perf_sample sample = {.ip = 0,};
1564
1565         event->sample.header.type = PERF_RECORD_SAMPLE;
1566         event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1567         event->sample.header.size = sizeof(struct perf_event_header);
1568
1569         /* Set time field based on etm auxtrace config. */
1570         sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1571
1572         sample.ip = addr;
1573         sample.pid = thread__pid(tidq->thread);
1574         sample.tid = thread__tid(tidq->thread);
1575         sample.id = etmq->etm->instructions_id;
1576         sample.stream_id = etmq->etm->instructions_id;
1577         sample.period = period;
1578         sample.cpu = tidq->packet->cpu;
1579         sample.flags = tidq->prev_packet->flags;
1580         sample.cpumode = event->sample.header.misc;
1581
1582         cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1583
1584         if (etm->synth_opts.last_branch)
1585                 sample.branch_stack = tidq->last_branch;
1586
1587         if (etm->synth_opts.inject) {
1588                 ret = cs_etm__inject_event(event, &sample,
1589                                            etm->instructions_sample_type);
1590                 if (ret)
1591                         return ret;
1592         }
1593
1594         ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1595
1596         if (ret)
1597                 pr_err(
1598                         "CS ETM Trace: failed to deliver instruction event, error %d\n",
1599                         ret);
1600
1601         return ret;
1602 }
1603
1604 /*
1605  * The cs etm packet encodes an instruction range between a branch target
1606  * and the next taken branch. Generate sample accordingly.
1607  */
1608 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1609                                        struct cs_etm_traceid_queue *tidq)
1610 {
1611         int ret = 0;
1612         struct cs_etm_auxtrace *etm = etmq->etm;
1613         struct perf_sample sample = {.ip = 0,};
1614         union perf_event *event = tidq->event_buf;
1615         struct dummy_branch_stack {
1616                 u64                     nr;
1617                 u64                     hw_idx;
1618                 struct branch_entry     entries;
1619         } dummy_bs;
1620         u64 ip;
1621
1622         ip = cs_etm__last_executed_instr(tidq->prev_packet);
1623
1624         event->sample.header.type = PERF_RECORD_SAMPLE;
1625         event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1626                                                      tidq->prev_packet_el);
1627         event->sample.header.size = sizeof(struct perf_event_header);
1628
1629         /* Set time field based on etm auxtrace config. */
1630         sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1631
1632         sample.ip = ip;
1633         sample.pid = thread__pid(tidq->prev_packet_thread);
1634         sample.tid = thread__tid(tidq->prev_packet_thread);
1635         sample.addr = cs_etm__first_executed_instr(tidq->packet);
1636         sample.id = etmq->etm->branches_id;
1637         sample.stream_id = etmq->etm->branches_id;
1638         sample.period = 1;
1639         sample.cpu = tidq->packet->cpu;
1640         sample.flags = tidq->prev_packet->flags;
1641         sample.cpumode = event->sample.header.misc;
1642
1643         cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1644                           &sample);
1645
1646         /*
1647          * perf report cannot handle events without a branch stack
1648          */
1649         if (etm->synth_opts.last_branch) {
1650                 dummy_bs = (struct dummy_branch_stack){
1651                         .nr = 1,
1652                         .hw_idx = -1ULL,
1653                         .entries = {
1654                                 .from = sample.ip,
1655                                 .to = sample.addr,
1656                         },
1657                 };
1658                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1659         }
1660
1661         if (etm->synth_opts.inject) {
1662                 ret = cs_etm__inject_event(event, &sample,
1663                                            etm->branches_sample_type);
1664                 if (ret)
1665                         return ret;
1666         }
1667
1668         ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1669
1670         if (ret)
1671                 pr_err(
1672                 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1673                 ret);
1674
1675         return ret;
1676 }
1677
1678 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1679                                 struct perf_session *session)
1680 {
1681         struct evlist *evlist = session->evlist;
1682         struct evsel *evsel;
1683         struct perf_event_attr attr;
1684         bool found = false;
1685         u64 id;
1686         int err;
1687
1688         evlist__for_each_entry(evlist, evsel) {
1689                 if (evsel->core.attr.type == etm->pmu_type) {
1690                         found = true;
1691                         break;
1692                 }
1693         }
1694
1695         if (!found) {
1696                 pr_debug("No selected events with CoreSight Trace data\n");
1697                 return 0;
1698         }
1699
1700         memset(&attr, 0, sizeof(struct perf_event_attr));
1701         attr.size = sizeof(struct perf_event_attr);
1702         attr.type = PERF_TYPE_HARDWARE;
1703         attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1704         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1705                             PERF_SAMPLE_PERIOD;
1706         if (etm->timeless_decoding)
1707                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1708         else
1709                 attr.sample_type |= PERF_SAMPLE_TIME;
1710
1711         attr.exclude_user = evsel->core.attr.exclude_user;
1712         attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1713         attr.exclude_hv = evsel->core.attr.exclude_hv;
1714         attr.exclude_host = evsel->core.attr.exclude_host;
1715         attr.exclude_guest = evsel->core.attr.exclude_guest;
1716         attr.sample_id_all = evsel->core.attr.sample_id_all;
1717         attr.read_format = evsel->core.attr.read_format;
1718
1719         /* create new id val to be a fixed offset from evsel id */
1720         id = evsel->core.id[0] + 1000000000;
1721
1722         if (!id)
1723                 id = 1;
1724
1725         if (etm->synth_opts.branches) {
1726                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1727                 attr.sample_period = 1;
1728                 attr.sample_type |= PERF_SAMPLE_ADDR;
1729                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1730                 if (err)
1731                         return err;
1732                 etm->branches_sample_type = attr.sample_type;
1733                 etm->branches_id = id;
1734                 id += 1;
1735                 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1736         }
1737
1738         if (etm->synth_opts.last_branch) {
1739                 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1740                 /*
1741                  * We don't use the hardware index, but the sample generation
1742                  * code uses the new format branch_stack with this field,
1743                  * so the event attributes must indicate that it's present.
1744                  */
1745                 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1746         }
1747
1748         if (etm->synth_opts.instructions) {
1749                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1750                 attr.sample_period = etm->synth_opts.period;
1751                 etm->instructions_sample_period = attr.sample_period;
1752                 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1753                 if (err)
1754                         return err;
1755                 etm->instructions_sample_type = attr.sample_type;
1756                 etm->instructions_id = id;
1757                 id += 1;
1758         }
1759
1760         return 0;
1761 }
1762
1763 static int cs_etm__sample(struct cs_etm_queue *etmq,
1764                           struct cs_etm_traceid_queue *tidq)
1765 {
1766         struct cs_etm_auxtrace *etm = etmq->etm;
1767         int ret;
1768         u8 trace_chan_id = tidq->trace_chan_id;
1769         u64 instrs_prev;
1770
1771         /* Get instructions remainder from previous packet */
1772         instrs_prev = tidq->period_instructions;
1773
1774         tidq->period_instructions += tidq->packet->instr_count;
1775
1776         /*
1777          * Record a branch when the last instruction in
1778          * PREV_PACKET is a branch.
1779          */
1780         if (etm->synth_opts.last_branch &&
1781             tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1782             tidq->prev_packet->last_instr_taken_branch)
1783                 cs_etm__update_last_branch_rb(etmq, tidq);
1784
1785         if (etm->synth_opts.instructions &&
1786             tidq->period_instructions >= etm->instructions_sample_period) {
1787                 /*
1788                  * Emit instruction sample periodically
1789                  * TODO: allow period to be defined in cycles and clock time
1790                  */
1791
1792                 /*
1793                  * Below diagram demonstrates the instruction samples
1794                  * generation flows:
1795                  *
1796                  *    Instrs     Instrs       Instrs       Instrs
1797                  *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1798                  *    |            |            |            |
1799                  *    V            V            V            V
1800                  *   --------------------------------------------------
1801                  *            ^                                  ^
1802                  *            |                                  |
1803                  *         Period                             Period
1804                  *    instructions(Pi)                   instructions(Pi')
1805                  *
1806                  *            |                                  |
1807                  *            \---------------- -----------------/
1808                  *                             V
1809                  *                 tidq->packet->instr_count
1810                  *
1811                  * Instrs Sample(n...) are the synthesised samples occurring
1812                  * every etm->instructions_sample_period instructions - as
1813                  * defined on the perf command line.  Sample(n) is being the
1814                  * last sample before the current etm packet, n+1 to n+3
1815                  * samples are generated from the current etm packet.
1816                  *
1817                  * tidq->packet->instr_count represents the number of
1818                  * instructions in the current etm packet.
1819                  *
1820                  * Period instructions (Pi) contains the number of
1821                  * instructions executed after the sample point(n) from the
1822                  * previous etm packet.  This will always be less than
1823                  * etm->instructions_sample_period.
1824                  *
1825                  * When generate new samples, it combines with two parts
1826                  * instructions, one is the tail of the old packet and another
1827                  * is the head of the new coming packet, to generate
1828                  * sample(n+1); sample(n+2) and sample(n+3) consume the
1829                  * instructions with sample period.  After sample(n+3), the rest
1830                  * instructions will be used by later packet and it is assigned
1831                  * to tidq->period_instructions for next round calculation.
1832                  */
1833
1834                 /*
1835                  * Get the initial offset into the current packet instructions;
1836                  * entry conditions ensure that instrs_prev is less than
1837                  * etm->instructions_sample_period.
1838                  */
1839                 u64 offset = etm->instructions_sample_period - instrs_prev;
1840                 u64 addr;
1841
1842                 /* Prepare last branches for instruction sample */
1843                 if (etm->synth_opts.last_branch)
1844                         cs_etm__copy_last_branch_rb(etmq, tidq);
1845
1846                 while (tidq->period_instructions >=
1847                                 etm->instructions_sample_period) {
1848                         /*
1849                          * Calculate the address of the sampled instruction (-1
1850                          * as sample is reported as though instruction has just
1851                          * been executed, but PC has not advanced to next
1852                          * instruction)
1853                          */
1854                         addr = cs_etm__instr_addr(etmq, trace_chan_id,
1855                                                   tidq->packet, offset - 1);
1856                         ret = cs_etm__synth_instruction_sample(
1857                                 etmq, tidq, addr,
1858                                 etm->instructions_sample_period);
1859                         if (ret)
1860                                 return ret;
1861
1862                         offset += etm->instructions_sample_period;
1863                         tidq->period_instructions -=
1864                                 etm->instructions_sample_period;
1865                 }
1866         }
1867
1868         if (etm->synth_opts.branches) {
1869                 bool generate_sample = false;
1870
1871                 /* Generate sample for tracing on packet */
1872                 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1873                         generate_sample = true;
1874
1875                 /* Generate sample for branch taken packet */
1876                 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1877                     tidq->prev_packet->last_instr_taken_branch)
1878                         generate_sample = true;
1879
1880                 if (generate_sample) {
1881                         ret = cs_etm__synth_branch_sample(etmq, tidq);
1882                         if (ret)
1883                                 return ret;
1884                 }
1885         }
1886
1887         cs_etm__packet_swap(etm, tidq);
1888
1889         return 0;
1890 }
1891
1892 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1893 {
1894         /*
1895          * When the exception packet is inserted, whether the last instruction
1896          * in previous range packet is taken branch or not, we need to force
1897          * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1898          * to generate branch sample for the instruction range before the
1899          * exception is trapped to kernel or before the exception returning.
1900          *
1901          * The exception packet includes the dummy address values, so don't
1902          * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1903          * for generating instruction and branch samples.
1904          */
1905         if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1906                 tidq->prev_packet->last_instr_taken_branch = true;
1907
1908         return 0;
1909 }
1910
1911 static int cs_etm__flush(struct cs_etm_queue *etmq,
1912                          struct cs_etm_traceid_queue *tidq)
1913 {
1914         int err = 0;
1915         struct cs_etm_auxtrace *etm = etmq->etm;
1916
1917         /* Handle start tracing packet */
1918         if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1919                 goto swap_packet;
1920
1921         if (etmq->etm->synth_opts.last_branch &&
1922             etmq->etm->synth_opts.instructions &&
1923             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1924                 u64 addr;
1925
1926                 /* Prepare last branches for instruction sample */
1927                 cs_etm__copy_last_branch_rb(etmq, tidq);
1928
1929                 /*
1930                  * Generate a last branch event for the branches left in the
1931                  * circular buffer at the end of the trace.
1932                  *
1933                  * Use the address of the end of the last reported execution
1934                  * range
1935                  */
1936                 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1937
1938                 err = cs_etm__synth_instruction_sample(
1939                         etmq, tidq, addr,
1940                         tidq->period_instructions);
1941                 if (err)
1942                         return err;
1943
1944                 tidq->period_instructions = 0;
1945
1946         }
1947
1948         if (etm->synth_opts.branches &&
1949             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1950                 err = cs_etm__synth_branch_sample(etmq, tidq);
1951                 if (err)
1952                         return err;
1953         }
1954
1955 swap_packet:
1956         cs_etm__packet_swap(etm, tidq);
1957
1958         /* Reset last branches after flush the trace */
1959         if (etm->synth_opts.last_branch)
1960                 cs_etm__reset_last_branch_rb(tidq);
1961
1962         return err;
1963 }
1964
1965 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1966                              struct cs_etm_traceid_queue *tidq)
1967 {
1968         int err;
1969
1970         /*
1971          * It has no new packet coming and 'etmq->packet' contains the stale
1972          * packet which was set at the previous time with packets swapping;
1973          * so skip to generate branch sample to avoid stale packet.
1974          *
1975          * For this case only flush branch stack and generate a last branch
1976          * event for the branches left in the circular buffer at the end of
1977          * the trace.
1978          */
1979         if (etmq->etm->synth_opts.last_branch &&
1980             etmq->etm->synth_opts.instructions &&
1981             tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1982                 u64 addr;
1983
1984                 /* Prepare last branches for instruction sample */
1985                 cs_etm__copy_last_branch_rb(etmq, tidq);
1986
1987                 /*
1988                  * Use the address of the end of the last reported execution
1989                  * range.
1990                  */
1991                 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1992
1993                 err = cs_etm__synth_instruction_sample(
1994                         etmq, tidq, addr,
1995                         tidq->period_instructions);
1996                 if (err)
1997                         return err;
1998
1999                 tidq->period_instructions = 0;
2000         }
2001
2002         return 0;
2003 }
2004 /*
2005  * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2006  *                         if need be.
2007  * Returns:     < 0     if error
2008  *              = 0     if no more auxtrace_buffer to read
2009  *              > 0     if the current buffer isn't empty yet
2010  */
2011 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2012 {
2013         int ret;
2014
2015         if (!etmq->buf_len) {
2016                 ret = cs_etm__get_trace(etmq);
2017                 if (ret <= 0)
2018                         return ret;
2019                 /*
2020                  * We cannot assume consecutive blocks in the data file
2021                  * are contiguous, reset the decoder to force re-sync.
2022                  */
2023                 ret = cs_etm_decoder__reset(etmq->decoder);
2024                 if (ret)
2025                         return ret;
2026         }
2027
2028         return etmq->buf_len;
2029 }
2030
2031 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2032                                  struct cs_etm_packet *packet,
2033                                  u64 end_addr)
2034 {
2035         /* Initialise to keep compiler happy */
2036         u16 instr16 = 0;
2037         u32 instr32 = 0;
2038         u64 addr;
2039
2040         switch (packet->isa) {
2041         case CS_ETM_ISA_T32:
2042                 /*
2043                  * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2044                  *
2045                  *  b'15         b'8
2046                  * +-----------------+--------+
2047                  * | 1 1 0 1 1 1 1 1 |  imm8  |
2048                  * +-----------------+--------+
2049                  *
2050                  * According to the specification, it only defines SVC for T32
2051                  * with 16 bits instruction and has no definition for 32bits;
2052                  * so below only read 2 bytes as instruction size for T32.
2053                  */
2054                 addr = end_addr - 2;
2055                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2056                                    (u8 *)&instr16, 0);
2057                 if ((instr16 & 0xFF00) == 0xDF00)
2058                         return true;
2059
2060                 break;
2061         case CS_ETM_ISA_A32:
2062                 /*
2063                  * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2064                  *
2065                  *  b'31 b'28 b'27 b'24
2066                  * +---------+---------+-------------------------+
2067                  * |  !1111  | 1 1 1 1 |        imm24            |
2068                  * +---------+---------+-------------------------+
2069                  */
2070                 addr = end_addr - 4;
2071                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2072                                    (u8 *)&instr32, 0);
2073                 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2074                     (instr32 & 0xF0000000) != 0xF0000000)
2075                         return true;
2076
2077                 break;
2078         case CS_ETM_ISA_A64:
2079                 /*
2080                  * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2081                  *
2082                  *  b'31               b'21           b'4     b'0
2083                  * +-----------------------+---------+-----------+
2084                  * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2085                  * +-----------------------+---------+-----------+
2086                  */
2087                 addr = end_addr - 4;
2088                 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2089                                    (u8 *)&instr32, 0);
2090                 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2091                         return true;
2092
2093                 break;
2094         case CS_ETM_ISA_UNKNOWN:
2095         default:
2096                 break;
2097         }
2098
2099         return false;
2100 }
2101
2102 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2103                                struct cs_etm_traceid_queue *tidq, u64 magic)
2104 {
2105         u8 trace_chan_id = tidq->trace_chan_id;
2106         struct cs_etm_packet *packet = tidq->packet;
2107         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2108
2109         if (magic == __perf_cs_etmv3_magic)
2110                 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2111                         return true;
2112
2113         /*
2114          * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2115          * HVC cases; need to check if it's SVC instruction based on
2116          * packet address.
2117          */
2118         if (magic == __perf_cs_etmv4_magic) {
2119                 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2120                     cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2121                                          prev_packet->end_addr))
2122                         return true;
2123         }
2124
2125         return false;
2126 }
2127
2128 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2129                                        u64 magic)
2130 {
2131         struct cs_etm_packet *packet = tidq->packet;
2132
2133         if (magic == __perf_cs_etmv3_magic)
2134                 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2135                     packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2136                     packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2137                     packet->exception_number == CS_ETMV3_EXC_IRQ ||
2138                     packet->exception_number == CS_ETMV3_EXC_FIQ)
2139                         return true;
2140
2141         if (magic == __perf_cs_etmv4_magic)
2142                 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2143                     packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2144                     packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2145                     packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2146                     packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2147                     packet->exception_number == CS_ETMV4_EXC_IRQ ||
2148                     packet->exception_number == CS_ETMV4_EXC_FIQ)
2149                         return true;
2150
2151         return false;
2152 }
2153
2154 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2155                                       struct cs_etm_traceid_queue *tidq,
2156                                       u64 magic)
2157 {
2158         u8 trace_chan_id = tidq->trace_chan_id;
2159         struct cs_etm_packet *packet = tidq->packet;
2160         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2161
2162         if (magic == __perf_cs_etmv3_magic)
2163                 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2164                     packet->exception_number == CS_ETMV3_EXC_HYP ||
2165                     packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2166                     packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2167                     packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2168                     packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2169                     packet->exception_number == CS_ETMV3_EXC_GENERIC)
2170                         return true;
2171
2172         if (magic == __perf_cs_etmv4_magic) {
2173                 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2174                     packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2175                     packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2176                     packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2177                         return true;
2178
2179                 /*
2180                  * For CS_ETMV4_EXC_CALL, except SVC other instructions
2181                  * (SMC, HVC) are taken as sync exceptions.
2182                  */
2183                 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2184                     !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2185                                           prev_packet->end_addr))
2186                         return true;
2187
2188                 /*
2189                  * ETMv4 has 5 bits for exception number; if the numbers
2190                  * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2191                  * they are implementation defined exceptions.
2192                  *
2193                  * For this case, simply take it as sync exception.
2194                  */
2195                 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2196                     packet->exception_number <= CS_ETMV4_EXC_END)
2197                         return true;
2198         }
2199
2200         return false;
2201 }
2202
2203 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2204                                     struct cs_etm_traceid_queue *tidq)
2205 {
2206         struct cs_etm_packet *packet = tidq->packet;
2207         struct cs_etm_packet *prev_packet = tidq->prev_packet;
2208         u8 trace_chan_id = tidq->trace_chan_id;
2209         u64 magic;
2210         int ret;
2211
2212         switch (packet->sample_type) {
2213         case CS_ETM_RANGE:
2214                 /*
2215                  * Immediate branch instruction without neither link nor
2216                  * return flag, it's normal branch instruction within
2217                  * the function.
2218                  */
2219                 if (packet->last_instr_type == OCSD_INSTR_BR &&
2220                     packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2221                         packet->flags = PERF_IP_FLAG_BRANCH;
2222
2223                         if (packet->last_instr_cond)
2224                                 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2225                 }
2226
2227                 /*
2228                  * Immediate branch instruction with link (e.g. BL), this is
2229                  * branch instruction for function call.
2230                  */
2231                 if (packet->last_instr_type == OCSD_INSTR_BR &&
2232                     packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2233                         packet->flags = PERF_IP_FLAG_BRANCH |
2234                                         PERF_IP_FLAG_CALL;
2235
2236                 /*
2237                  * Indirect branch instruction with link (e.g. BLR), this is
2238                  * branch instruction for function call.
2239                  */
2240                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2241                     packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2242                         packet->flags = PERF_IP_FLAG_BRANCH |
2243                                         PERF_IP_FLAG_CALL;
2244
2245                 /*
2246                  * Indirect branch instruction with subtype of
2247                  * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2248                  * function return for A32/T32.
2249                  */
2250                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2251                     packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2252                         packet->flags = PERF_IP_FLAG_BRANCH |
2253                                         PERF_IP_FLAG_RETURN;
2254
2255                 /*
2256                  * Indirect branch instruction without link (e.g. BR), usually
2257                  * this is used for function return, especially for functions
2258                  * within dynamic link lib.
2259                  */
2260                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2261                     packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2262                         packet->flags = PERF_IP_FLAG_BRANCH |
2263                                         PERF_IP_FLAG_RETURN;
2264
2265                 /* Return instruction for function return. */
2266                 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2267                     packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2268                         packet->flags = PERF_IP_FLAG_BRANCH |
2269                                         PERF_IP_FLAG_RETURN;
2270
2271                 /*
2272                  * Decoder might insert a discontinuity in the middle of
2273                  * instruction packets, fixup prev_packet with flag
2274                  * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2275                  */
2276                 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2277                         prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2278                                               PERF_IP_FLAG_TRACE_BEGIN;
2279
2280                 /*
2281                  * If the previous packet is an exception return packet
2282                  * and the return address just follows SVC instruction,
2283                  * it needs to calibrate the previous packet sample flags
2284                  * as PERF_IP_FLAG_SYSCALLRET.
2285                  */
2286                 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2287                                            PERF_IP_FLAG_RETURN |
2288                                            PERF_IP_FLAG_INTERRUPT) &&
2289                     cs_etm__is_svc_instr(etmq, trace_chan_id,
2290                                          packet, packet->start_addr))
2291                         prev_packet->flags = PERF_IP_FLAG_BRANCH |
2292                                              PERF_IP_FLAG_RETURN |
2293                                              PERF_IP_FLAG_SYSCALLRET;
2294                 break;
2295         case CS_ETM_DISCONTINUITY:
2296                 /*
2297                  * The trace is discontinuous, if the previous packet is
2298                  * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2299                  * for previous packet.
2300                  */
2301                 if (prev_packet->sample_type == CS_ETM_RANGE)
2302                         prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2303                                               PERF_IP_FLAG_TRACE_END;
2304                 break;
2305         case CS_ETM_EXCEPTION:
2306                 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2307                 if (ret)
2308                         return ret;
2309
2310                 /* The exception is for system call. */
2311                 if (cs_etm__is_syscall(etmq, tidq, magic))
2312                         packet->flags = PERF_IP_FLAG_BRANCH |
2313                                         PERF_IP_FLAG_CALL |
2314                                         PERF_IP_FLAG_SYSCALLRET;
2315                 /*
2316                  * The exceptions are triggered by external signals from bus,
2317                  * interrupt controller, debug module, PE reset or halt.
2318                  */
2319                 else if (cs_etm__is_async_exception(tidq, magic))
2320                         packet->flags = PERF_IP_FLAG_BRANCH |
2321                                         PERF_IP_FLAG_CALL |
2322                                         PERF_IP_FLAG_ASYNC |
2323                                         PERF_IP_FLAG_INTERRUPT;
2324                 /*
2325                  * Otherwise, exception is caused by trap, instruction &
2326                  * data fault, or alignment errors.
2327                  */
2328                 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2329                         packet->flags = PERF_IP_FLAG_BRANCH |
2330                                         PERF_IP_FLAG_CALL |
2331                                         PERF_IP_FLAG_INTERRUPT;
2332
2333                 /*
2334                  * When the exception packet is inserted, since exception
2335                  * packet is not used standalone for generating samples
2336                  * and it's affiliation to the previous instruction range
2337                  * packet; so set previous range packet flags to tell perf
2338                  * it is an exception taken branch.
2339                  */
2340                 if (prev_packet->sample_type == CS_ETM_RANGE)
2341                         prev_packet->flags = packet->flags;
2342                 break;
2343         case CS_ETM_EXCEPTION_RET:
2344                 /*
2345                  * When the exception return packet is inserted, since
2346                  * exception return packet is not used standalone for
2347                  * generating samples and it's affiliation to the previous
2348                  * instruction range packet; so set previous range packet
2349                  * flags to tell perf it is an exception return branch.
2350                  *
2351                  * The exception return can be for either system call or
2352                  * other exception types; unfortunately the packet doesn't
2353                  * contain exception type related info so we cannot decide
2354                  * the exception type purely based on exception return packet.
2355                  * If we record the exception number from exception packet and
2356                  * reuse it for exception return packet, this is not reliable
2357                  * due the trace can be discontinuity or the interrupt can
2358                  * be nested, thus the recorded exception number cannot be
2359                  * used for exception return packet for these two cases.
2360                  *
2361                  * For exception return packet, we only need to distinguish the
2362                  * packet is for system call or for other types.  Thus the
2363                  * decision can be deferred when receive the next packet which
2364                  * contains the return address, based on the return address we
2365                  * can read out the previous instruction and check if it's a
2366                  * system call instruction and then calibrate the sample flag
2367                  * as needed.
2368                  */
2369                 if (prev_packet->sample_type == CS_ETM_RANGE)
2370                         prev_packet->flags = PERF_IP_FLAG_BRANCH |
2371                                              PERF_IP_FLAG_RETURN |
2372                                              PERF_IP_FLAG_INTERRUPT;
2373                 break;
2374         case CS_ETM_EMPTY:
2375         default:
2376                 break;
2377         }
2378
2379         return 0;
2380 }
2381
2382 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2383 {
2384         int ret = 0;
2385         size_t processed = 0;
2386
2387         /*
2388          * Packets are decoded and added to the decoder's packet queue
2389          * until the decoder packet processing callback has requested that
2390          * processing stops or there is nothing left in the buffer.  Normal
2391          * operations that stop processing are a timestamp packet or a full
2392          * decoder buffer queue.
2393          */
2394         ret = cs_etm_decoder__process_data_block(etmq->decoder,
2395                                                  etmq->offset,
2396                                                  &etmq->buf[etmq->buf_used],
2397                                                  etmq->buf_len,
2398                                                  &processed);
2399         if (ret)
2400                 goto out;
2401
2402         etmq->offset += processed;
2403         etmq->buf_used += processed;
2404         etmq->buf_len -= processed;
2405
2406 out:
2407         return ret;
2408 }
2409
2410 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2411                                          struct cs_etm_traceid_queue *tidq)
2412 {
2413         int ret;
2414         struct cs_etm_packet_queue *packet_queue;
2415
2416         packet_queue = &tidq->packet_queue;
2417
2418         /* Process each packet in this chunk */
2419         while (1) {
2420                 ret = cs_etm_decoder__get_packet(packet_queue,
2421                                                  tidq->packet);
2422                 if (ret <= 0)
2423                         /*
2424                          * Stop processing this chunk on
2425                          * end of data or error
2426                          */
2427                         break;
2428
2429                 /*
2430                  * Since packet addresses are swapped in packet
2431                  * handling within below switch() statements,
2432                  * thus setting sample flags must be called
2433                  * prior to switch() statement to use address
2434                  * information before packets swapping.
2435                  */
2436                 ret = cs_etm__set_sample_flags(etmq, tidq);
2437                 if (ret < 0)
2438                         break;
2439
2440                 switch (tidq->packet->sample_type) {
2441                 case CS_ETM_RANGE:
2442                         /*
2443                          * If the packet contains an instruction
2444                          * range, generate instruction sequence
2445                          * events.
2446                          */
2447                         cs_etm__sample(etmq, tidq);
2448                         break;
2449                 case CS_ETM_EXCEPTION:
2450                 case CS_ETM_EXCEPTION_RET:
2451                         /*
2452                          * If the exception packet is coming,
2453                          * make sure the previous instruction
2454                          * range packet to be handled properly.
2455                          */
2456                         cs_etm__exception(tidq);
2457                         break;
2458                 case CS_ETM_DISCONTINUITY:
2459                         /*
2460                          * Discontinuity in trace, flush
2461                          * previous branch stack
2462                          */
2463                         cs_etm__flush(etmq, tidq);
2464                         break;
2465                 case CS_ETM_EMPTY:
2466                         /*
2467                          * Should not receive empty packet,
2468                          * report error.
2469                          */
2470                         pr_err("CS ETM Trace: empty packet\n");
2471                         return -EINVAL;
2472                 default:
2473                         break;
2474                 }
2475         }
2476
2477         return ret;
2478 }
2479
2480 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2481 {
2482         int idx;
2483         struct int_node *inode;
2484         struct cs_etm_traceid_queue *tidq;
2485         struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2486
2487         intlist__for_each_entry(inode, traceid_queues_list) {
2488                 idx = (int)(intptr_t)inode->priv;
2489                 tidq = etmq->traceid_queues[idx];
2490
2491                 /* Ignore return value */
2492                 cs_etm__process_traceid_queue(etmq, tidq);
2493         }
2494 }
2495
2496 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2497 {
2498         int err = 0;
2499         struct cs_etm_traceid_queue *tidq;
2500
2501         tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2502         if (!tidq)
2503                 return -EINVAL;
2504
2505         /* Go through each buffer in the queue and decode them one by one */
2506         while (1) {
2507                 err = cs_etm__get_data_block(etmq);
2508                 if (err <= 0)
2509                         return err;
2510
2511                 /* Run trace decoder until buffer consumed or end of trace */
2512                 do {
2513                         err = cs_etm__decode_data_block(etmq);
2514                         if (err)
2515                                 return err;
2516
2517                         /*
2518                          * Process each packet in this chunk, nothing to do if
2519                          * an error occurs other than hoping the next one will
2520                          * be better.
2521                          */
2522                         err = cs_etm__process_traceid_queue(etmq, tidq);
2523
2524                 } while (etmq->buf_len);
2525
2526                 if (err == 0)
2527                         /* Flush any remaining branch stack entries */
2528                         err = cs_etm__end_block(etmq, tidq);
2529         }
2530
2531         return err;
2532 }
2533
2534 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2535 {
2536         int idx, err = 0;
2537         struct cs_etm_traceid_queue *tidq;
2538         struct int_node *inode;
2539
2540         /* Go through each buffer in the queue and decode them one by one */
2541         while (1) {
2542                 err = cs_etm__get_data_block(etmq);
2543                 if (err <= 0)
2544                         return err;
2545
2546                 /* Run trace decoder until buffer consumed or end of trace */
2547                 do {
2548                         err = cs_etm__decode_data_block(etmq);
2549                         if (err)
2550                                 return err;
2551
2552                         /*
2553                          * cs_etm__run_per_thread_timeless_decoder() runs on a
2554                          * single traceID queue because each TID has a separate
2555                          * buffer. But here in per-cpu mode we need to iterate
2556                          * over each channel instead.
2557                          */
2558                         intlist__for_each_entry(inode,
2559                                                 etmq->traceid_queues_list) {
2560                                 idx = (int)(intptr_t)inode->priv;
2561                                 tidq = etmq->traceid_queues[idx];
2562                                 cs_etm__process_traceid_queue(etmq, tidq);
2563                         }
2564                 } while (etmq->buf_len);
2565
2566                 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2567                         idx = (int)(intptr_t)inode->priv;
2568                         tidq = etmq->traceid_queues[idx];
2569                         /* Flush any remaining branch stack entries */
2570                         err = cs_etm__end_block(etmq, tidq);
2571                         if (err)
2572                                 return err;
2573                 }
2574         }
2575
2576         return err;
2577 }
2578
2579 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2580                                            pid_t tid)
2581 {
2582         unsigned int i;
2583         struct auxtrace_queues *queues = &etm->queues;
2584
2585         for (i = 0; i < queues->nr_queues; i++) {
2586                 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2587                 struct cs_etm_queue *etmq = queue->priv;
2588                 struct cs_etm_traceid_queue *tidq;
2589
2590                 if (!etmq)
2591                         continue;
2592
2593                 if (etm->per_thread_decoding) {
2594                         tidq = cs_etm__etmq_get_traceid_queue(
2595                                 etmq, CS_ETM_PER_THREAD_TRACEID);
2596
2597                         if (!tidq)
2598                                 continue;
2599
2600                         if (tid == -1 || thread__tid(tidq->thread) == tid)
2601                                 cs_etm__run_per_thread_timeless_decoder(etmq);
2602                 } else
2603                         cs_etm__run_per_cpu_timeless_decoder(etmq);
2604         }
2605
2606         return 0;
2607 }
2608
2609 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2610 {
2611         int ret = 0;
2612         unsigned int cs_queue_nr, queue_nr, i;
2613         u8 trace_chan_id;
2614         u64 cs_timestamp;
2615         struct auxtrace_queue *queue;
2616         struct cs_etm_queue *etmq;
2617         struct cs_etm_traceid_queue *tidq;
2618
2619         /*
2620          * Pre-populate the heap with one entry from each queue so that we can
2621          * start processing in time order across all queues.
2622          */
2623         for (i = 0; i < etm->queues.nr_queues; i++) {
2624                 etmq = etm->queues.queue_array[i].priv;
2625                 if (!etmq)
2626                         continue;
2627
2628                 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2629                 if (ret)
2630                         return ret;
2631         }
2632
2633         while (1) {
2634                 if (!etm->heap.heap_cnt)
2635                         break;
2636
2637                 /* Take the entry at the top of the min heap */
2638                 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2639                 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2640                 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2641                 queue = &etm->queues.queue_array[queue_nr];
2642                 etmq = queue->priv;
2643
2644                 /*
2645                  * Remove the top entry from the heap since we are about
2646                  * to process it.
2647                  */
2648                 auxtrace_heap__pop(&etm->heap);
2649
2650                 tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2651                 if (!tidq) {
2652                         /*
2653                          * No traceID queue has been allocated for this traceID,
2654                          * which means something somewhere went very wrong.  No
2655                          * other choice than simply exit.
2656                          */
2657                         ret = -EINVAL;
2658                         goto out;
2659                 }
2660
2661                 /*
2662                  * Packets associated with this timestamp are already in
2663                  * the etmq's traceID queue, so process them.
2664                  */
2665                 ret = cs_etm__process_traceid_queue(etmq, tidq);
2666                 if (ret < 0)
2667                         goto out;
2668
2669                 /*
2670                  * Packets for this timestamp have been processed, time to
2671                  * move on to the next timestamp, fetching a new auxtrace_buffer
2672                  * if need be.
2673                  */
2674 refetch:
2675                 ret = cs_etm__get_data_block(etmq);
2676                 if (ret < 0)
2677                         goto out;
2678
2679                 /*
2680                  * No more auxtrace_buffers to process in this etmq, simply
2681                  * move on to another entry in the auxtrace_heap.
2682                  */
2683                 if (!ret)
2684                         continue;
2685
2686                 ret = cs_etm__decode_data_block(etmq);
2687                 if (ret)
2688                         goto out;
2689
2690                 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2691
2692                 if (!cs_timestamp) {
2693                         /*
2694                          * Function cs_etm__decode_data_block() returns when
2695                          * there is no more traces to decode in the current
2696                          * auxtrace_buffer OR when a timestamp has been
2697                          * encountered on any of the traceID queues.  Since we
2698                          * did not get a timestamp, there is no more traces to
2699                          * process in this auxtrace_buffer.  As such empty and
2700                          * flush all traceID queues.
2701                          */
2702                         cs_etm__clear_all_traceid_queues(etmq);
2703
2704                         /* Fetch another auxtrace_buffer for this etmq */
2705                         goto refetch;
2706                 }
2707
2708                 /*
2709                  * Add to the min heap the timestamp for packets that have
2710                  * just been decoded.  They will be processed and synthesized
2711                  * during the next call to cs_etm__process_traceid_queue() for
2712                  * this queue/traceID.
2713                  */
2714                 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2715                 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2716         }
2717
2718         for (i = 0; i < etm->queues.nr_queues; i++) {
2719                 struct int_node *inode;
2720
2721                 etmq = etm->queues.queue_array[i].priv;
2722                 if (!etmq)
2723                         continue;
2724
2725                 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2726                         int idx = (int)(intptr_t)inode->priv;
2727
2728                         /* Flush any remaining branch stack entries */
2729                         tidq = etmq->traceid_queues[idx];
2730                         ret = cs_etm__end_block(etmq, tidq);
2731                         if (ret)
2732                                 return ret;
2733                 }
2734         }
2735 out:
2736         return ret;
2737 }
2738
2739 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2740                                         union perf_event *event)
2741 {
2742         struct thread *th;
2743
2744         if (etm->timeless_decoding)
2745                 return 0;
2746
2747         /*
2748          * Add the tid/pid to the log so that we can get a match when we get a
2749          * contextID from the decoder. Only track for the host: only kernel
2750          * trace is supported for guests which wouldn't need pids so this should
2751          * be fine.
2752          */
2753         th = machine__findnew_thread(&etm->session->machines.host,
2754                                      event->itrace_start.pid,
2755                                      event->itrace_start.tid);
2756         if (!th)
2757                 return -ENOMEM;
2758
2759         thread__put(th);
2760
2761         return 0;
2762 }
2763
2764 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2765                                            union perf_event *event)
2766 {
2767         struct thread *th;
2768         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2769
2770         /*
2771          * Context switch in per-thread mode are irrelevant since perf
2772          * will start/stop tracing as the process is scheduled.
2773          */
2774         if (etm->timeless_decoding)
2775                 return 0;
2776
2777         /*
2778          * SWITCH_IN events carry the next process to be switched out while
2779          * SWITCH_OUT events carry the process to be switched in.  As such
2780          * we don't care about IN events.
2781          */
2782         if (!out)
2783                 return 0;
2784
2785         /*
2786          * Add the tid/pid to the log so that we can get a match when we get a
2787          * contextID from the decoder. Only track for the host: only kernel
2788          * trace is supported for guests which wouldn't need pids so this should
2789          * be fine.
2790          */
2791         th = machine__findnew_thread(&etm->session->machines.host,
2792                                      event->context_switch.next_prev_pid,
2793                                      event->context_switch.next_prev_tid);
2794         if (!th)
2795                 return -ENOMEM;
2796
2797         thread__put(th);
2798
2799         return 0;
2800 }
2801
2802 static int cs_etm__process_event(struct perf_session *session,
2803                                  union perf_event *event,
2804                                  struct perf_sample *sample,
2805                                  const struct perf_tool *tool)
2806 {
2807         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2808                                                    struct cs_etm_auxtrace,
2809                                                    auxtrace);
2810
2811         if (dump_trace)
2812                 return 0;
2813
2814         if (!tool->ordered_events) {
2815                 pr_err("CoreSight ETM Trace requires ordered events\n");
2816                 return -EINVAL;
2817         }
2818
2819         switch (event->header.type) {
2820         case PERF_RECORD_EXIT:
2821                 /*
2822                  * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2823                  * start the decode because we know there will be no more trace from
2824                  * this thread. All this does is emit samples earlier than waiting for
2825                  * the flush in other modes, but with timestamps it makes sense to wait
2826                  * for flush so that events from different threads are interleaved
2827                  * properly.
2828                  */
2829                 if (etm->per_thread_decoding && etm->timeless_decoding)
2830                         return cs_etm__process_timeless_queues(etm,
2831                                                                event->fork.tid);
2832                 break;
2833
2834         case PERF_RECORD_ITRACE_START:
2835                 return cs_etm__process_itrace_start(etm, event);
2836
2837         case PERF_RECORD_SWITCH_CPU_WIDE:
2838                 return cs_etm__process_switch_cpu_wide(etm, event);
2839
2840         case PERF_RECORD_AUX:
2841                 /*
2842                  * Record the latest kernel timestamp available in the header
2843                  * for samples so that synthesised samples occur from this point
2844                  * onwards.
2845                  */
2846                 if (sample->time && (sample->time != (u64)-1))
2847                         etm->latest_kernel_timestamp = sample->time;
2848                 break;
2849
2850         default:
2851                 break;
2852         }
2853
2854         return 0;
2855 }
2856
2857 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2858                              struct perf_record_auxtrace *event)
2859 {
2860         struct auxtrace_buffer *buf;
2861         unsigned int i;
2862         /*
2863          * Find all buffers with same reference in the queues and dump them.
2864          * This is because the queues can contain multiple entries of the same
2865          * buffer that were split on aux records.
2866          */
2867         for (i = 0; i < etm->queues.nr_queues; ++i)
2868                 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2869                         if (buf->reference == event->reference)
2870                                 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2871 }
2872
2873 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2874                                           union perf_event *event,
2875                                           const struct perf_tool *tool __maybe_unused)
2876 {
2877         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2878                                                    struct cs_etm_auxtrace,
2879                                                    auxtrace);
2880         if (!etm->data_queued) {
2881                 struct auxtrace_buffer *buffer;
2882                 off_t  data_offset;
2883                 int fd = perf_data__fd(session->data);
2884                 bool is_pipe = perf_data__is_pipe(session->data);
2885                 int err;
2886                 int idx = event->auxtrace.idx;
2887
2888                 if (is_pipe)
2889                         data_offset = 0;
2890                 else {
2891                         data_offset = lseek(fd, 0, SEEK_CUR);
2892                         if (data_offset == -1)
2893                                 return -errno;
2894                 }
2895
2896                 err = auxtrace_queues__add_event(&etm->queues, session,
2897                                                  event, data_offset, &buffer);
2898                 if (err)
2899                         return err;
2900
2901                 if (dump_trace)
2902                         if (auxtrace_buffer__get_data(buffer, fd)) {
2903                                 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2904                                 auxtrace_buffer__put_data(buffer);
2905                         }
2906         } else if (dump_trace)
2907                 dump_queued_data(etm, &event->auxtrace);
2908
2909         return 0;
2910 }
2911
2912 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2913 {
2914         struct evsel *evsel;
2915         struct evlist *evlist = etm->session->evlist;
2916
2917         /* Override timeless mode with user input from --itrace=Z */
2918         if (etm->synth_opts.timeless_decoding) {
2919                 etm->timeless_decoding = true;
2920                 return 0;
2921         }
2922
2923         /*
2924          * Find the cs_etm evsel and look at what its timestamp setting was
2925          */
2926         evlist__for_each_entry(evlist, evsel)
2927                 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2928                         etm->timeless_decoding =
2929                                 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2930                         return 0;
2931                 }
2932
2933         pr_err("CS ETM: Couldn't find ETM evsel\n");
2934         return -EINVAL;
2935 }
2936
2937 /*
2938  * Read a single cpu parameter block from the auxtrace_info priv block.
2939  *
2940  * For version 1 there is a per cpu nr_params entry. If we are handling
2941  * version 1 file, then there may be less, the same, or more params
2942  * indicated by this value than the compile time number we understand.
2943  *
2944  * For a version 0 info block, there are a fixed number, and we need to
2945  * fill out the nr_param value in the metadata we create.
2946  */
2947 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2948                                     int out_blk_size, int nr_params_v0)
2949 {
2950         u64 *metadata = NULL;
2951         int hdr_version;
2952         int nr_in_params, nr_out_params, nr_cmn_params;
2953         int i, k;
2954
2955         metadata = zalloc(sizeof(*metadata) * out_blk_size);
2956         if (!metadata)
2957                 return NULL;
2958
2959         /* read block current index & version */
2960         i = *buff_in_offset;
2961         hdr_version = buff_in[CS_HEADER_VERSION];
2962
2963         if (!hdr_version) {
2964         /* read version 0 info block into a version 1 metadata block  */
2965                 nr_in_params = nr_params_v0;
2966                 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2967                 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2968                 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2969                 /* remaining block params at offset +1 from source */
2970                 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2971                         metadata[k + 1] = buff_in[i + k];
2972                 /* version 0 has 2 common params */
2973                 nr_cmn_params = 2;
2974         } else {
2975         /* read version 1 info block - input and output nr_params may differ */
2976                 /* version 1 has 3 common params */
2977                 nr_cmn_params = 3;
2978                 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2979
2980                 /* if input has more params than output - skip excess */
2981                 nr_out_params = nr_in_params + nr_cmn_params;
2982                 if (nr_out_params > out_blk_size)
2983                         nr_out_params = out_blk_size;
2984
2985                 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2986                         metadata[k] = buff_in[i + k];
2987
2988                 /* record the actual nr params we copied */
2989                 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2990         }
2991
2992         /* adjust in offset by number of in params used */
2993         i += nr_in_params + nr_cmn_params;
2994         *buff_in_offset = i;
2995         return metadata;
2996 }
2997
2998 /**
2999  * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3000  * on the bounds of aux_event, if it matches with the buffer that's at
3001  * file_offset.
3002  *
3003  * Normally, whole auxtrace buffers would be added to the queue. But we
3004  * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3005  * is reset across each buffer, so splitting the buffers up in advance has
3006  * the same effect.
3007  */
3008 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3009                                       struct perf_record_aux *aux_event, struct perf_sample *sample)
3010 {
3011         int err;
3012         char buf[PERF_SAMPLE_MAX_SIZE];
3013         union perf_event *auxtrace_event_union;
3014         struct perf_record_auxtrace *auxtrace_event;
3015         union perf_event auxtrace_fragment;
3016         __u64 aux_offset, aux_size;
3017         enum cs_etm_format format;
3018
3019         struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3020                                                    struct cs_etm_auxtrace,
3021                                                    auxtrace);
3022
3023         /*
3024          * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3025          * from looping through the auxtrace index.
3026          */
3027         err = perf_session__peek_event(session, file_offset, buf,
3028                                        PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3029         if (err)
3030                 return err;
3031         auxtrace_event = &auxtrace_event_union->auxtrace;
3032         if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3033                 return -EINVAL;
3034
3035         if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3036                 auxtrace_event->header.size != sz) {
3037                 return -EINVAL;
3038         }
3039
3040         /*
3041          * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3042          * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3043          * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3044          * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3045          * Return 'not found' if mismatch.
3046          */
3047         if (auxtrace_event->cpu == (__u32) -1) {
3048                 etm->per_thread_decoding = true;
3049                 if (auxtrace_event->tid != sample->tid)
3050                         return 1;
3051         } else if (auxtrace_event->cpu != sample->cpu) {
3052                 if (etm->per_thread_decoding) {
3053                         /*
3054                          * Found a per-cpu buffer after a per-thread one was
3055                          * already found
3056                          */
3057                         pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3058                         return -EINVAL;
3059                 }
3060                 return 1;
3061         }
3062
3063         if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3064                 /*
3065                  * Clamp size in snapshot mode. The buffer size is clamped in
3066                  * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3067                  * the buffer size.
3068                  */
3069                 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3070
3071                 /*
3072                  * In this mode, the head also points to the end of the buffer so aux_offset
3073                  * needs to have the size subtracted so it points to the beginning as in normal mode
3074                  */
3075                 aux_offset = aux_event->aux_offset - aux_size;
3076         } else {
3077                 aux_size = aux_event->aux_size;
3078                 aux_offset = aux_event->aux_offset;
3079         }
3080
3081         if (aux_offset >= auxtrace_event->offset &&
3082             aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3083                 struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3084
3085                 /*
3086                  * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3087                  * based on the sizes of the aux event, and queue that fragment.
3088                  */
3089                 auxtrace_fragment.auxtrace = *auxtrace_event;
3090                 auxtrace_fragment.auxtrace.size = aux_size;
3091                 auxtrace_fragment.auxtrace.offset = aux_offset;
3092                 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3093
3094                 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3095                           " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3096                 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3097                                                  file_offset, NULL);
3098                 if (err)
3099                         return err;
3100
3101                 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3102                                 UNFORMATTED : FORMATTED;
3103                 if (etmq->format != UNSET && format != etmq->format) {
3104                         pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3105                         return -EINVAL;
3106                 }
3107                 etmq->format = format;
3108                 return 0;
3109         }
3110
3111         /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3112         return 1;
3113 }
3114
3115 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3116                                         u64 offset __maybe_unused, void *data __maybe_unused)
3117 {
3118         /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3119         if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3120                 (*(int *)data)++; /* increment found count */
3121                 return cs_etm__process_aux_output_hw_id(session, event);
3122         }
3123         return 0;
3124 }
3125
3126 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3127                                         u64 offset __maybe_unused, void *data __maybe_unused)
3128 {
3129         struct perf_sample sample;
3130         int ret;
3131         struct auxtrace_index_entry *ent;
3132         struct auxtrace_index *auxtrace_index;
3133         struct evsel *evsel;
3134         size_t i;
3135
3136         /* Don't care about any other events, we're only queuing buffers for AUX events */
3137         if (event->header.type != PERF_RECORD_AUX)
3138                 return 0;
3139
3140         if (event->header.size < sizeof(struct perf_record_aux))
3141                 return -EINVAL;
3142
3143         /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3144         if (!event->aux.aux_size)
3145                 return 0;
3146
3147         /*
3148          * Parse the sample, we need the sample_id_all data that comes after the event so that the
3149          * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3150          */
3151         evsel = evlist__event2evsel(session->evlist, event);
3152         if (!evsel)
3153                 return -EINVAL;
3154         ret = evsel__parse_sample(evsel, event, &sample);
3155         if (ret)
3156                 return ret;
3157
3158         /*
3159          * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3160          */
3161         list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3162                 for (i = 0; i < auxtrace_index->nr; i++) {
3163                         ent = &auxtrace_index->entries[i];
3164                         ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3165                                                          ent->sz, &event->aux, &sample);
3166                         /*
3167                          * Stop search on error or successful values. Continue search on
3168                          * 1 ('not found')
3169                          */
3170                         if (ret != 1)
3171                                 return ret;
3172                 }
3173         }
3174
3175         /*
3176          * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3177          * don't exit with an error because it will still be possible to decode other aux records.
3178          */
3179         pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3180                " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3181         return 0;
3182 }
3183
3184 static int cs_etm__queue_aux_records(struct perf_session *session)
3185 {
3186         struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3187                                                                 struct auxtrace_index, list);
3188         if (index && index->nr > 0)
3189                 return perf_session__peek_events(session, session->header.data_offset,
3190                                                  session->header.data_size,
3191                                                  cs_etm__queue_aux_records_cb, NULL);
3192
3193         /*
3194          * We would get here if there are no entries in the index (either no auxtrace
3195          * buffers or no index at all). Fail silently as there is the possibility of
3196          * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3197          * false.
3198          *
3199          * In that scenario, buffers will not be split by AUX records.
3200          */
3201         return 0;
3202 }
3203
3204 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3205                                   (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3206
3207 /*
3208  * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3209  * timestamps).
3210  */
3211 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3212 {
3213         int j;
3214
3215         for (j = 0; j < num_cpu; j++) {
3216                 switch (metadata[j][CS_ETM_MAGIC]) {
3217                 case __perf_cs_etmv4_magic:
3218                         if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3219                                 return false;
3220                         break;
3221                 case __perf_cs_ete_magic:
3222                         if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3223                                 return false;
3224                         break;
3225                 default:
3226                         /* Unknown / unsupported magic number. */
3227                         return false;
3228                 }
3229         }
3230         return true;
3231 }
3232
3233 /* map trace ids to correct metadata block, from information in metadata */
3234 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3235                                           u64 **metadata)
3236 {
3237         u64 cs_etm_magic;
3238         u8 trace_chan_id;
3239         int i, err;
3240
3241         for (i = 0; i < num_cpu; i++) {
3242                 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3243                 switch (cs_etm_magic) {
3244                 case __perf_cs_etmv3_magic:
3245                         metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3246                         trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3247                         break;
3248                 case __perf_cs_etmv4_magic:
3249                 case __perf_cs_ete_magic:
3250                         metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3251                         trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3252                         break;
3253                 default:
3254                         /* unknown magic number */
3255                         return -EINVAL;
3256                 }
3257                 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3258                 if (err)
3259                         return err;
3260         }
3261         return 0;
3262 }
3263
3264 /*
3265  * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3266  * (formatted or not) packets to create the decoders.
3267  */
3268 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3269 {
3270         struct cs_etm_decoder_params d_params;
3271         struct cs_etm_trace_params  *t_params;
3272         int decoders = intlist__nr_entries(etmq->traceid_list);
3273
3274         if (decoders == 0)
3275                 return 0;
3276
3277         /*
3278          * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3279          * needed.
3280          */
3281         if (etmq->format == UNFORMATTED)
3282                 assert(decoders == 1);
3283
3284         /* Use metadata to fill in trace parameters for trace decoder */
3285         t_params = zalloc(sizeof(*t_params) * decoders);
3286
3287         if (!t_params)
3288                 goto out_free;
3289
3290         if (cs_etm__init_trace_params(t_params, etmq))
3291                 goto out_free;
3292
3293         /* Set decoder parameters to decode trace packets */
3294         if (cs_etm__init_decoder_params(&d_params, etmq,
3295                                         dump_trace ? CS_ETM_OPERATION_PRINT :
3296                                                      CS_ETM_OPERATION_DECODE))
3297                 goto out_free;
3298
3299         etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3300                                             t_params);
3301
3302         if (!etmq->decoder)
3303                 goto out_free;
3304
3305         /*
3306          * Register a function to handle all memory accesses required by
3307          * the trace decoder library.
3308          */
3309         if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3310                                               0x0L, ((u64) -1L),
3311                                               cs_etm__mem_access))
3312                 goto out_free_decoder;
3313
3314         zfree(&t_params);
3315         return 0;
3316
3317 out_free_decoder:
3318         cs_etm_decoder__free(etmq->decoder);
3319 out_free:
3320         zfree(&t_params);
3321         return -EINVAL;
3322 }
3323
3324 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3325 {
3326         struct auxtrace_queues *queues = &etm->queues;
3327
3328         for (unsigned int i = 0; i < queues->nr_queues; i++) {
3329                 bool empty = list_empty(&queues->queue_array[i].head);
3330                 struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3331                 int ret;
3332
3333                 /*
3334                  * Don't create decoders for empty queues, mainly because
3335                  * etmq->format is unknown for empty queues.
3336                  */
3337                 assert(empty || etmq->format != UNSET);
3338                 if (empty)
3339                         continue;
3340
3341                 ret = cs_etm__create_queue_decoders(etmq);
3342                 if (ret)
3343                         return ret;
3344         }
3345         return 0;
3346 }
3347
3348 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3349                                        struct perf_session *session)
3350 {
3351         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3352         struct cs_etm_auxtrace *etm = NULL;
3353         struct perf_record_time_conv *tc = &session->time_conv;
3354         int event_header_size = sizeof(struct perf_event_header);
3355         int total_size = auxtrace_info->header.size;
3356         int priv_size = 0;
3357         int num_cpu, max_cpu = 0;
3358         int err = 0;
3359         int aux_hw_id_found;
3360         int i;
3361         u64 *ptr = NULL;
3362         u64 **metadata = NULL;
3363
3364         /* First the global part */
3365         ptr = (u64 *) auxtrace_info->priv;
3366         num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3367         metadata = zalloc(sizeof(*metadata) * num_cpu);
3368         if (!metadata)
3369                 return -ENOMEM;
3370
3371         /* Start parsing after the common part of the header */
3372         i = CS_HEADER_VERSION_MAX;
3373
3374         /*
3375          * The metadata is stored in the auxtrace_info section and encodes
3376          * the configuration of the ARM embedded trace macrocell which is
3377          * required by the trace decoder to properly decode the trace due
3378          * to its highly compressed nature.
3379          */
3380         for (int j = 0; j < num_cpu; j++) {
3381                 if (ptr[i] == __perf_cs_etmv3_magic) {
3382                         metadata[j] =
3383                                 cs_etm__create_meta_blk(ptr, &i,
3384                                                         CS_ETM_PRIV_MAX,
3385                                                         CS_ETM_NR_TRC_PARAMS_V0);
3386                 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3387                         metadata[j] =
3388                                 cs_etm__create_meta_blk(ptr, &i,
3389                                                         CS_ETMV4_PRIV_MAX,
3390                                                         CS_ETMV4_NR_TRC_PARAMS_V0);
3391                 } else if (ptr[i] == __perf_cs_ete_magic) {
3392                         metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3393                 } else {
3394                         ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3395                                   ptr[i]);
3396                         err = -EINVAL;
3397                         goto err_free_metadata;
3398                 }
3399
3400                 if (!metadata[j]) {
3401                         err = -ENOMEM;
3402                         goto err_free_metadata;
3403                 }
3404
3405                 if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3406                         max_cpu = metadata[j][CS_ETM_CPU];
3407         }
3408
3409         /*
3410          * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3411          * CS_ETMV4_PRIV_MAX mark how many double words are in the
3412          * global metadata, and each cpu's metadata respectively.
3413          * The following tests if the correct number of double words was
3414          * present in the auxtrace info section.
3415          */
3416         priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3417         if (i * 8 != priv_size) {
3418                 err = -EINVAL;
3419                 goto err_free_metadata;
3420         }
3421
3422         etm = zalloc(sizeof(*etm));
3423
3424         if (!etm) {
3425                 err = -ENOMEM;
3426                 goto err_free_metadata;
3427         }
3428
3429         /*
3430          * As all the ETMs run at the same exception level, the system should
3431          * have the same PID format crossing CPUs.  So cache the PID format
3432          * and reuse it for sequential decoding.
3433          */
3434         etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3435
3436         err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3437         if (err)
3438                 goto err_free_etm;
3439
3440         for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3441                 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3442                 if (err)
3443                         goto err_free_queues;
3444         }
3445
3446         if (session->itrace_synth_opts->set) {
3447                 etm->synth_opts = *session->itrace_synth_opts;
3448         } else {
3449                 itrace_synth_opts__set_default(&etm->synth_opts,
3450                                 session->itrace_synth_opts->default_no_sample);
3451                 etm->synth_opts.callchain = false;
3452         }
3453
3454         etm->session = session;
3455
3456         etm->num_cpu = num_cpu;
3457         etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3458         etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3459         etm->metadata = metadata;
3460         etm->auxtrace_type = auxtrace_info->type;
3461
3462         if (etm->synth_opts.use_timestamp)
3463                 /*
3464                  * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3465                  * therefore the decoder cannot know if the timestamp trace is
3466                  * same with the kernel time.
3467                  *
3468                  * If a user has knowledge for the working platform and can
3469                  * specify itrace option 'T' to tell decoder to forcely use the
3470                  * traced timestamp as the kernel time.
3471                  */
3472                 etm->has_virtual_ts = true;
3473         else
3474                 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3475                 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3476
3477         if (!etm->has_virtual_ts)
3478                 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3479                             "The time field of the samples will not be set accurately.\n"
3480                             "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3481                             "you can specify the itrace option 'T' for timestamp decoding\n"
3482                             "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3483
3484         etm->auxtrace.process_event = cs_etm__process_event;
3485         etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3486         etm->auxtrace.flush_events = cs_etm__flush_events;
3487         etm->auxtrace.free_events = cs_etm__free_events;
3488         etm->auxtrace.free = cs_etm__free;
3489         etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3490         session->auxtrace = &etm->auxtrace;
3491
3492         err = cs_etm__setup_timeless_decoding(etm);
3493         if (err)
3494                 return err;
3495
3496         etm->tc.time_shift = tc->time_shift;
3497         etm->tc.time_mult = tc->time_mult;
3498         etm->tc.time_zero = tc->time_zero;
3499         if (event_contains(*tc, time_cycles)) {
3500                 etm->tc.time_cycles = tc->time_cycles;
3501                 etm->tc.time_mask = tc->time_mask;
3502                 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3503                 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3504         }
3505         err = cs_etm__synth_events(etm, session);
3506         if (err)
3507                 goto err_free_queues;
3508
3509         err = cs_etm__queue_aux_records(session);
3510         if (err)
3511                 goto err_free_queues;
3512
3513         /*
3514          * Map Trace ID values to CPU metadata.
3515          *
3516          * Trace metadata will always contain Trace ID values from the legacy algorithm
3517          * in case it's read by a version of Perf that doesn't know about HW_ID packets
3518          * or the kernel doesn't emit them.
3519          *
3520          * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3521          * the same IDs as the old algorithm as far as is possible, unless there are clashes
3522          * in which case a different value will be used. This means an older perf may still
3523          * be able to record and read files generate on a newer system.
3524          *
3525          * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3526          * those packets. If they are there then the values will be mapped and plugged into
3527          * the metadata and decoders are only created for each mapping received.
3528          *
3529          * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3530          * then we map Trace ID values to CPU directly from the metadata and create decoders
3531          * for all mappings.
3532          */
3533
3534         /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3535         aux_hw_id_found = 0;
3536         err = perf_session__peek_events(session, session->header.data_offset,
3537                                         session->header.data_size,
3538                                         cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3539         if (err)
3540                 goto err_free_queues;
3541
3542         /* if no HW ID found this is a file with metadata values only, map from metadata */
3543         if (!aux_hw_id_found) {
3544                 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3545                 if (err)
3546                         goto err_free_queues;
3547         }
3548
3549         err = cs_etm__create_decoders(etm);
3550         if (err)
3551                 goto err_free_queues;
3552
3553         etm->data_queued = etm->queues.populated;
3554         return 0;
3555
3556 err_free_queues:
3557         auxtrace_queues__free(&etm->queues);
3558         session->auxtrace = NULL;
3559 err_free_etm:
3560         zfree(&etm);
3561 err_free_metadata:
3562         /* No need to check @metadata[j], free(NULL) is supported */
3563         for (int j = 0; j < num_cpu; j++)
3564                 zfree(&metadata[j]);
3565         zfree(&metadata);
3566         return err;
3567 }
This page took 0.247078 seconds and 4 git commands to generate.