int ret = 0;
struct kprobe *prev;
struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
- struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1));
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
} else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
ret = -EINVAL;
- } else if (ppc_inst_prefixed(prefix)) {
+ } else if ((unsigned long)p->addr & ~PAGE_MASK &&
+ ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
return 0;
}
+#ifdef CONFIG_MMU
void *alloc_insn_page(void)
{
return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
__builtin_return_address(0));
}
+#endif
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
/*
* In case the user-specified fault handler returned
* zero, try to fix up.
if (!atomic_inc_not_zero(&pmc_refcount)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&pmc_refcount) == 0) {
- if (!reserve_pmc_hardware())
+ if (!reserve_pmc_hardware()) {
err = -EBUSY;
- else
+ } else {
reserve_ds_buffers();
+ reserve_lbr_buffers();
+ }
}
if (!err)
atomic_inc(&pmc_refcount);
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto do_del;
+ __set_bit(event->hw.idx, cpuc->dirty);
+
/*
* Not a TXN, therefore cleanup properly.
*/
return err;
}
+ void perf_clear_dirty_counters(void)
+ {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ /* Don't need to clear the assigned counter. */
+ for (i = 0; i < cpuc->n_events; i++)
+ __clear_bit(cpuc->assign[i], cpuc->dirty);
+
+ if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
+ return;
+
+ for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
+ /* Metrics and fake events don't have corresponding HW counters. */
+ if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
+ continue;
+ else if (i >= INTEL_PMC_IDX_FIXED)
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ else
+ wrmsrl(x86_pmu_event_addr(i), 0);
+ }
+
+ bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+ }
+
static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
-
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
EVENT_EXTRA_END
};
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0. If a :ppp event which is not
* available on the GP counter 0, error out.
+ * Exception: Instruction PDIR is only available on the fixed counter 0.
*/
- if (event->attr.precise_ip == 3) {
+ if ((event->attr.precise_ip == 3) &&
+ !constraint_match(&fixed0_constraint, event->hw.config)) {
if (c->idxmsk64 & BIT_ULL(0))
return &counter0_constraint;
tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(pmem);
- if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+ /*
+ * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default.
+ * TSX force abort hooks are not required on these systems. Only deploy
+ * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT.
+ */
+ if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) &&
+ !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
x86_pmu.flags |= PMU_FL_TFA;
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
pmu->name = "cpu_core";
pmu->cpu_type = hybrid_big;
- pmu->num_counters = x86_pmu.num_counters + 2;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+ pmu->num_counters = x86_pmu.num_counters + 2;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+ } else {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
* Check all LBT MSR here.
* Disable LBR access if any LBR MSRs can not be accessed.
*/
- if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
x86_pmu.lbr_nr = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
#define SKX_M2M_PCI_PMON_CTR0 0x200
#define SKX_M2M_PCI_PMON_BOX_CTL 0x258
+ /* Memory Map registers device ID */
+ #define SNR_ICX_MESH2IIO_MMAP_DID 0x9a2
+ #define SNR_ICX_SAD_CONTROL_CFG 0x3f4
+
+ /* Getting I/O stack id in SAD_COTROL_CFG notation */
+ #define SAD_CONTROL_STACK_ID(data) (((data) >> 4) & 0x7)
+
/* SNR Ubox */
#define SNR_U_MSR_PMON_CTR0 0x1f98
#define SNR_U_MSR_PMON_CTL0 0x1f91
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
+ if (die_id < 0)
+ die_id = -ENODEV;
map->pbus_to_dieid[bus] = die_id;
break;
}
i = -1;
if (reverse) {
for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
}
} else {
for (bus = 0; bus <= 255; bus++) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
}
static umode_t
- skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+ pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr,
+ int die, int zero_bus_pmu)
{
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
- /* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */
- return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode;
+ return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode;
+ }
+
+ static umode_t
+ skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+ {
+ /* Root bus 0x00 is valid only for pmu_idx = 0. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 0);
}
static ssize_t skx_iio_mapping_show(struct device *dev,
NULL,
};
- static int skx_iio_set_mapping(struct intel_uncore_type *type)
+ static int
+ pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
{
char buf[64];
int ret;
struct attribute **attrs = NULL;
struct dev_ext_attribute *eas = NULL;
- ret = skx_iio_get_topology(type);
+ ret = type->get_topology(type);
if (ret < 0)
goto clear_attr_update;
eas[die].var = (void *)die;
attrs[die] = &eas[die].attr.attr;
}
- skx_iio_mapping_group.attrs = attrs;
+ ag->attrs = attrs;
return 0;
err:
return ret;
}
+ static int skx_iio_set_mapping(struct intel_uncore_type *type)
+ {
+ return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+ }
+
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
{
struct attribute **attr = skx_iio_mapping_group.attrs;
.ops = &skx_uncore_iio_ops,
.format_group = &skx_uncore_iio_format_group,
.attr_update = skx_iio_attr_update,
+ .get_topology = skx_iio_get_topology,
.set_mapping = skx_iio_set_mapping,
.cleanup_mapping = skx_iio_cleanup_mapping,
};
.attrs = snr_uncore_iio_formats_attr,
};
+ static umode_t
+ snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+ {
+ /* Root bus 0x00 is valid only for pmu_idx = 1. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 1);
+ }
+
+ static struct attribute_group snr_iio_mapping_group = {
+ .is_visible = snr_iio_mapping_visible,
+ };
+
+ static const struct attribute_group *snr_iio_attr_update[] = {
+ &snr_iio_mapping_group,
+ NULL,
+ };
+
+ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping)
+ {
+ u32 sad_cfg;
+ int die, stack_id, ret = -EPERM;
+ struct pci_dev *dev = NULL;
+
+ type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
+ GFP_KERNEL);
+ if (!type->topology)
+ return -ENOMEM;
+
+ while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) {
+ ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ break;
+ }
+
+ die = uncore_pcibus_to_dieid(dev->bus);
+ stack_id = SAD_CONTROL_STACK_ID(sad_cfg);
+ if (die < 0 || stack_id >= type->num_boxes) {
+ ret = -EPERM;
+ break;
+ }
+
+ /* Convert stack id from SAD_CONTROL to PMON notation. */
+ stack_id = sad_pmon_mapping[stack_id];
+
+ ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number;
+ type->topology[die].segment = pci_domain_nr(dev->bus);
+ }
+
+ if (ret) {
+ kfree(type->topology);
+ type->topology = NULL;
+ }
+
+ return ret;
+ }
+
+ /*
+ * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+ enum {
+ SNR_QAT_PMON_ID,
+ SNR_CBDMA_DMI_PMON_ID,
+ SNR_NIS_PMON_ID,
+ SNR_DLB_PMON_ID,
+ SNR_PCIE_GEN3_PMON_ID
+ };
+
+ static u8 snr_sad_pmon_mapping[] = {
+ SNR_CBDMA_DMI_PMON_ID,
+ SNR_PCIE_GEN3_PMON_ID,
+ SNR_DLB_PMON_ID,
+ SNR_NIS_PMON_ID,
+ SNR_QAT_PMON_ID
+ };
+
+ static int snr_iio_get_topology(struct intel_uncore_type *type)
+ {
+ return sad_cfg_iio_topology(type, snr_sad_pmon_mapping);
+ }
+
+ static int snr_iio_set_mapping(struct intel_uncore_type *type)
+ {
+ return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+ }
+
static struct intel_uncore_type snr_uncore_iio = {
.name = "iio",
.num_counters = 4,
.msr_offset = SNR_IIO_MSR_OFFSET,
.ops = &ivbep_uncore_msr_ops,
.format_group = &snr_uncore_iio_format_group,
+ .attr_update = snr_iio_attr_update,
+ .get_topology = snr_iio_get_topology,
+ .set_mapping = snr_iio_set_mapping,
+ .cleanup_mapping = skx_iio_cleanup_mapping,
};
static struct intel_uncore_type snr_uncore_irp = {
EVENT_CONSTRAINT_END
};
+ static umode_t
+ icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+ {
+ /* Root bus 0x00 is valid only for pmu_idx = 5. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 5);
+ }
+
+ static struct attribute_group icx_iio_mapping_group = {
+ .is_visible = icx_iio_mapping_visible,
+ };
+
+ static const struct attribute_group *icx_iio_attr_update[] = {
+ &icx_iio_mapping_group,
+ NULL,
+ };
+
+ /*
+ * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+ enum {
+ ICX_PCIE1_PMON_ID,
+ ICX_PCIE2_PMON_ID,
+ ICX_PCIE3_PMON_ID,
+ ICX_PCIE4_PMON_ID,
+ ICX_PCIE5_PMON_ID,
+ ICX_CBDMA_DMI_PMON_ID
+ };
+
+ static u8 icx_sad_pmon_mapping[] = {
+ ICX_CBDMA_DMI_PMON_ID,
+ ICX_PCIE1_PMON_ID,
+ ICX_PCIE2_PMON_ID,
+ ICX_PCIE3_PMON_ID,
+ ICX_PCIE4_PMON_ID,
+ ICX_PCIE5_PMON_ID,
+ };
+
+ static int icx_iio_get_topology(struct intel_uncore_type *type)
+ {
+ return sad_cfg_iio_topology(type, icx_sad_pmon_mapping);
+ }
+
+ static int icx_iio_set_mapping(struct intel_uncore_type *type)
+ {
+ return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+ }
+
static struct intel_uncore_type icx_uncore_iio = {
.name = "iio",
.num_counters = 4,
.constraints = icx_uncore_iio_constraints,
.ops = &skx_uncore_iio_ops,
.format_group = &snr_uncore_iio_format_group,
+ .attr_update = icx_iio_attr_update,
+ .get_topology = icx_iio_get_topology,
+ .set_mapping = icx_iio_set_mapping,
+ .cleanup_mapping = skx_iio_cleanup_mapping,
};
static struct intel_uncore_type icx_uncore_irp = {
.perf_ctr = SNR_M2M_PCI_PMON_CTR0,
.event_ctl = SNR_M2M_PCI_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
.box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
.ops = &snr_m2m_uncore_pci_ops,
- .format_group = &skx_uncore_format_group,
+ .format_group = &snr_m2m_uncore_format_group,
};
static struct attribute *icx_upi_uncore_formats_attr[] = {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events; /* the # of events in the below arrays */
void release_lbr_buffers(void);
+void reserve_lbr_buffers(void);
+
extern struct event_constraint bts_constraint;
extern struct event_constraint vlbr_constraint;
{
}
+static inline void reserve_lbr_buffers(void)
+{
+}
+
static inline int intel_pmu_init(void)
{
return 0;
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
-
- force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+ else
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
local_irq_disable();
}
return;
/* kprobes don't want to hook the spurious faults: */
- if (kprobe_page_fault(regs, X86_TRAP_PF))
+ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
return;
/*
}
/* kprobes don't want to hook the spurious faults: */
- if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
+ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
return;
/*
/**
* cpu_function_call - call a function on the cpu
+ * @cpu: target cpu to queue this function
* @func: the function to be called
* @info: the function call argument
*
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
- struct pmu *pmu = ctx->pmu;
+ struct pmu *pmu;
cpuctx = __get_cpu_context(ctx);
+
+ /*
+ * HACK: for HETEROGENEOUS the task context might have switched to a
+ * different PMU, force (re)set the context,
+ */
+ pmu = ctx->pmu = cpuctx->ctx.pmu;
+
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
++ctx->pin_count;
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
return ctx;
}
static void perf_sigtrap(struct perf_event *event)
{
- struct kernel_siginfo info;
-
/*
* We'd expect this to only occur if the irq_work is delayed and either
* ctx->task or current has changed in the meantime. This can be the
if (current->flags & PF_EXITING)
return;
- clear_siginfo(&info);
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_PERF;
- info.si_errno = event->attr.type;
- info.si_perf = event->attr.sig_data;
- info.si_addr = (void __user *)event->pending_addr;
- force_sig_info(&info);
+ force_sig_perf((void __user *)event->pending_addr,
+ event->attr.type, event->attr.sig_data);
}
static void perf_pending_event_disable(struct perf_event *event)
return data->aux_size;
}
- long perf_pmu_snapshot_aux(struct perf_buffer *rb,
- struct perf_event *event,
- struct perf_output_handle *handle,
- unsigned long size)
+ static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
+ struct perf_event *event,
+ struct perf_output_handle *handle,
+ unsigned long size)
{
unsigned long flags;
long ret;
* @pid: target pid
* @cpu: target cpu
* @group_fd: group leader event fd
+ * @flags: perf event open flags
*/
SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
* @attr: attributes of the counter to create
* @cpu: cpu in which the counter is bound
* @task: task to profile (NULL for percpu)
+ * @overflow_handler: callback to trigger when we hit the event
+ * @context: context data could be used in overflow_handler callback
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,