The new sample type, PERF_SAMPLE_WEIGHT_STRUCT, is an alternative of the
PERF_SAMPLE_WEIGHT sample type. Users can apply either the
PERF_SAMPLE_WEIGHT sample type or the PERF_SAMPLE_WEIGHT_STRUCT sample
type to retrieve the sample weight, but they cannot apply both sample
types simultaneously.
The new sample type shares the same space as the PERF_SAMPLE_WEIGHT
sample type. The lower 32 bits are exactly the same for both sample
type. The higher 32 bits may be different for different architecture.
Add arch specific arch_evsel__set_sample_weight() to set the new sample
type for X86. Only store the lower 32 bits for the sample->weight if the
new sample type is applied. In practice, no memory access could last
than 4G cycles. No data will be lost.
If the kernel doesn't support the new sample type. Fall back to the
PERF_SAMPLE_WEIGHT sample type.
There is no impact for other architectures.
Committer notes:
Fixup related to PERF_SAMPLE_CODE_PAGE_SIZE, present in acme/perf/core
but not upstream yet.
Signed-off-by: Kan Liang <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Jin Yao <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Madhavan Srinivasan <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
perf-y += event.o
perf-y += evlist.o
perf-y += mem-events.o
+perf-y += evsel.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
return found_term;
}
+void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT);
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
}
if (opts->sample_weight)
- evsel__set_sample_bit(evsel, WEIGHT);
+ arch_evsel__set_sample_weight(evsel);
attr->task = track;
attr->mmap = track;
}
fallback_missing_features:
+ if (perf_missing_features.weight_struct) {
+ evsel__set_sample_bit(evsel, WEIGHT);
+ evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
+ }
if (perf_missing_features.clockid_wrong)
evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
if (perf_missing_features.clockid) {
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.code_page_size &&
+ if (!perf_missing_features.weight_struct &&
+ (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
+ perf_missing_features.weight_struct = true;
+ pr_debug2("switching off weight struct support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.code_page_size &&
(evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
perf_missing_features.code_page_size = true;
pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
}
}
- if (type & PERF_SAMPLE_WEIGHT) {
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+ union perf_sample_weight weight;
+
OVERFLOW_CHECK_u64(array);
- data->weight = *array;
+ weight.full = *array;
+ if (type & PERF_SAMPLE_WEIGHT)
+ data->weight = weight.full;
+ else
+ data->weight = weight.var1_dw;
array++;
}
bool cgroup;
bool data_page_size;
bool code_page_size;
+ bool weight_struct;
};
extern struct perf_missing_features perf_missing_features;
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+void arch_evsel__set_sample_weight(struct evsel *evsel);
+
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
sample.addr = items->mem_access_address;
- if (sample_type & PERF_SAMPLE_WEIGHT) {
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
/*
* Refer kernel's setup_pebs_adaptive_sample_data() and
* intel_hsw_weight().
*/
- if (items->has_mem_access_latency)
- sample.weight = items->mem_access_latency;
+ if (items->has_mem_access_latency) {
+ u64 weight = items->mem_access_latency >> 32;
+
+ /*
+ * Starts from SPR, the mem access latency field
+ * contains both cache latency [47:32] and instruction
+ * latency [15:0]. The cache latency is the same as the
+ * mem access latency on previous platforms.
+ *
+ * In practice, no memory access could last than 4G
+ * cycles. Use latency >> 32 to distinguish the
+ * different format of the mem access latency field.
+ */
+ if (weight > 0)
+ sample.weight = weight & 0xffff;
+ else
+ sample.weight = items->mem_access_latency;
+ }
if (!sample.weight && items->has_tsx_aux_info) {
/* Cycles last block */
sample.weight = (u32)items->tsx_aux_info;
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE),
+ bit_name(WEIGHT_STRUCT),
{ .name = NULL, }
};
#undef bit_name
if (sample_type & PERF_SAMPLE_STACK_USER)
stack_user__printf(&sample->user_stack);
- if (sample_type & PERF_SAMPLE_WEIGHT)
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
printf("... weight: %" PRIu64 "\n", sample->weight);
if (sample_type & PERF_SAMPLE_DATA_SRC)
}
}
- if (type & PERF_SAMPLE_WEIGHT)
+ if (type & PERF_SAMPLE_WEIGHT_TYPE)
result += sizeof(u64);
if (type & PERF_SAMPLE_DATA_SRC)
}
}
- if (type & PERF_SAMPLE_WEIGHT) {
+ if (type & PERF_SAMPLE_WEIGHT_TYPE) {
*array = sample->weight;
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+ *array &= 0xffffffff;
array++;
}