extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+ extern int numa_update_cpu_topology(bool cpus_locked);
+static inline int early_cpu_to_node(int cpu)
+{
+ int nid;
+
+ nid = numa_cpu_lookup_table[cpu];
+
+ /*
+ * Fall back to node 0 if nid is unset (it should be, except bugs).
+ * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+ */
+ return (nid < 0) ? 0 : nid;
+}
#else
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
static inline void dump_numa_cpu_topology(void) {}
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
int nid)
{
}
+
+ static inline int numa_update_cpu_topology(bool cpus_locked)
+ {
+ return 0;
+ }
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
return;
}
- get_online_cpus();
+ cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- put_online_cpus();
+ cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
- cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- "ppc/kvm_book3s:prepare",
- kvmppc_set_host_core,
- kvmppc_clear_host_core);
- put_online_cpus();
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
state->master = 0;
}
- get_online_cpus();
+ cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
/* Ensure state is consistent before we call the other cpus */
mb();
- stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
static int subcore_init(void)
{
- if (!cpu_has_feature(CPU_FTR_SUBCORE))
+ unsigned pvr_ver;
+
+ pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+ if (pvr_ver != PVR_POWER8 &&
+ pvr_ver != PVR_POWER8E &&
+ pvr_ver != PVR_POWER8NVL)
return 0;
/*
return ret;
}
+static struct attribute_group x86_pmu_attr_group;
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
x86_pmu_events_group.attrs = tmp;
}
+ if (x86_pmu.attrs) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
+ if (!WARN_ON(!tmp))
+ x86_pmu_attr_group.attrs = tmp;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
static void refresh_pce(void *ignored)
{
- if (current->active_mm)
- load_mm_cr4(current->active_mm);
+ load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
static void x86_pmu_event_mapped(struct perf_event *event)
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
- EXPORT_SYMBOL_GPL(perf_check_microcode);
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
- struct cyc2ns_data *data;
+ struct cyc2ns_data data;
u64 offset;
userpg->cap_user_time = 0;
if (!using_native_sched_clock() || !sched_clock_stable())
return;
- data = cyc2ns_read_begin();
+ cyc2ns_read_begin(&data);
- offset = data->cyc2ns_offset + __sched_clock_offset;
+ offset = data.cyc2ns_offset + __sched_clock_offset;
/*
* Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain.
*/
userpg->cap_user_time = 1;
- userpg->time_mult = data->cyc2ns_mul;
- userpg->time_shift = data->cyc2ns_shift;
+ userpg->time_mult = data.cyc2ns_mul;
+ userpg->time_shift = data.cyc2ns_shift;
userpg->time_offset = offset - now;
/*
userpg->time_zero = offset;
}
- cyc2ns_read_end(data);
+ cyc2ns_read_end();
}
void
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
- if (!ldt || idx > ldt->size)
+ if (!ldt || idx > ldt->nr_entries)
return 0;
desc = &ldt->entries[idx];
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
return -ENOMEM;
}
+static void flip_smm_bit(void *data)
+{
+ unsigned long set = *(unsigned long *)data;
+
+ if (set > 0) {
+ msr_set_bit(MSR_IA32_DEBUGCTLMSR,
+ DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+ } else {
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
+ DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+ }
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
cpuc->lbr_sel = NULL;
+ flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+
if (!cpuc->shared_regs)
return;
int pebs_broken = 0;
int cpu;
- get_online_cpus();
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
- put_online_cpus();
if (pebs_broken == x86_pmu.pebs_broken)
return;
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
+ cpus_read_lock();
intel_snb_check_microcode();
+ cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
NULL
};
+static ssize_t freeze_on_smi_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
+}
+
+static DEFINE_MUTEX(freeze_on_smi_mutex);
+
+static ssize_t freeze_on_smi_store(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ ssize_t ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 1)
+ return -EINVAL;
+
+ mutex_lock(&freeze_on_smi_mutex);
+
+ if (x86_pmu.attr_freeze_on_smi == val)
+ goto done;
+
+ x86_pmu.attr_freeze_on_smi = val;
+
+ get_online_cpus();
+ on_each_cpu(flip_smm_bit, &val, 1);
+ put_online_cpus();
+done:
+ mutex_unlock(&freeze_on_smi_mutex);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(freeze_on_smi);
+
+static struct attribute *intel_pmu_attrs[] = {
+ &dev_attr_freeze_on_smi.attr,
+ NULL,
+};
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
+ x86_pmu.attrs = intel_pmu_attrs;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
lockup_detector_resume();
- get_online_cpus();
+ cpus_read_lock();
- for_each_online_cpu(c) {
+ for_each_online_cpu(c)
free_excl_cntrs(c);
- }
- put_online_cpus();
+ cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
up_write(&policy->rwsem);
}
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
pr_debug("trying to register driver %s\n", driver_data->name);
/* Protect against concurrent CPU online/offline. */
- get_online_cpus();
+ cpus_read_lock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
if (!(cpufreq_driver->flags & CPUFREQ_STICKY) &&
list_empty(&cpufreq_policy_list)) {
/* if all ->init() calls failed, unregister */
+ ret = -ENODEV;
pr_debug("%s: No CPU initialized for driver %s\n", __func__,
driver_data->name);
goto err_if_unreg;
}
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
- cpuhp_cpufreq_online,
- cpuhp_cpufreq_offline);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "cpufreq:online",
+ cpuhp_cpufreq_online,
+ cpuhp_cpufreq_offline);
if (ret < 0)
goto err_if_unreg;
hp_online = ret;
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
out:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
pr_debug("unregistering driver %s\n", driver->name);
/* Protect against concurrent cpu hotplug */
- get_online_cpus();
+ cpus_read_lock();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
- cpuhp_remove_state_nocalls(hp_online);
+ cpuhp_remove_state_nocalls_cpuslocked(hp_online);
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
CPUHP_XEN_EVTCHN_PREPARE,
CPUHP_ARM_SHMOBILE_SCU_PREPARE,
CPUHP_SH_SH3X_PREPARE,
- CPUHP_BLK_MQ_PREPARE,
CPUHP_NET_FLOW_PREPARE,
CPUHP_TOPOLOGY_PREPARE,
CPUHP_NET_IUCV_PREPARE,
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_PERF_ONLINE,
CPUHP_AP_PERF_X86_ONLINE,
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu), bool multi_instance);
+ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
+ bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance);
/**
* cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
* @state: The state for which the calls are installed
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
+ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+ {
+ return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
+ teardown, false);
+ }
+
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
* callbacks
false);
}
+ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+ {
+ return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
+ teardown, false);
+ }
+
/**
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
bool invoke);
+ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node, bool invoke);
/**
* cpuhp_state_add_instance - Add an instance for a state and invoke startup
return __cpuhp_state_add_instance(state, node, false);
}
+ static inline int
+ cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node)
+ {
+ return __cpuhp_state_add_instance_cpuslocked(state, node, false);
+ }
+
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
/**
* cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
__cpuhp_remove_state(state, false);
}
+ static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
+ {
+ __cpuhp_remove_state_cpuslocked(state, false);
+ }
+
/**
* cpuhp_remove_multi_state - Remove hotplug multi state callback
* @state: The state for which the calls are removed
PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
/* Do not use FLR even if device advertises PCI_AF_CAP */
PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+ /*
+ * Resume before calling the driver's system suspend hooks, disabling
+ * the direct_complete optimization.
+ */
+ PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
};
enum pci_irq_reroute_variant {
unsigned int irq_managed:1;
unsigned int has_secondary_link:1;
unsigned int non_compliant_bars:1; /* broken BARs; ignore them */
+ unsigned int is_probed:1; /* device probing in progress */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
unsigned int max_vecs, unsigned int flags,
const struct irq_affinity *aff_desc)
{
- if (min_vecs > 1)
- return -EINVAL;
- return 1;
+ if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+ return 1;
+ return -ENOSPC;
}
static inline void pci_free_irq_vectors(struct pci_dev *dev)
u64 dl_runtime; /* Maximum runtime for each instance */
u64 dl_deadline; /* Relative deadline of each instance */
u64 dl_period; /* Separation of two instances (period) */
- u64 dl_bw; /* dl_runtime / dl_deadline */
+ u64 dl_bw; /* dl_runtime / dl_period */
+ u64 dl_density; /* dl_runtime / dl_deadline */
/*
* Actual scheduling parameters. Initialized with the values above,
*
* @dl_yielded tells if task gave up the CPU before consuming
* all its available runtime during the last job.
+ *
+ * @dl_non_contending tells if the task is inactive while still
+ * contributing to the active utilization. In other words, it
+ * indicates if the inactive timer has been armed and its handler
+ * has not been executed yet. This flag is useful to avoid race
+ * conditions between the inactive timer handler and the wakeup
+ * code.
*/
int dl_throttled;
int dl_boosted;
int dl_yielded;
+ int dl_non_contending;
/*
* Bandwidth enforcement timer. Each -deadline task has its
* own bandwidth to be enforced, thus we need one timer per task.
*/
struct hrtimer dl_timer;
+
+ /*
+ * Inactive timer, responsible for decreasing the active utilization
+ * at the "0-lag time". When a -deadline task blocks, it contributes
+ * to GRUB's active utilization until the "0-lag time", hence a
+ * timer is needed to decrease the active utilization at the correct
+ * time.
+ */
+ struct hrtimer inactive_timer;
};
union rcu_special {
* current.
* task_xid_nr_ns() : id seen from the ns specified;
*
- * set_task_vxid() : assigns a virtual id to a task;
- *
* see also pid_nr() etc in include/linux/pid.h
*/
pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
+ static inline bool is_percpu_thread(void)
+ {
+ #ifdef CONFIG_SMP
+ return (current->flags & PF_NO_SETAFFINITY) &&
+ (current->nr_cpus_allowed == 1);
+ #else
+ return true;
+ #endif
+ }
+
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
+ #include <linux/percpu-rwsem.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+ #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+ static struct lock_class_key cpuhp_state_key;
+ static struct lockdep_map cpuhp_state_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+ #endif
+
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
mutex_unlock(&cpu_add_remove_lock);
}
- /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ /*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
- static struct {
- struct task_struct *active_writer;
- /* wait queue to wake up the active_writer */
- wait_queue_head_t wq;
- /* verifies that no writer will get active while readers are active */
- struct mutex lock;
- /*
- * Also blocks the new readers during
- * an ongoing cpu hotplug operation.
- */
- atomic_t refcount;
-
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
- #endif
- } cpu_hotplug = {
- .active_writer = NULL,
- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
- .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
- #endif
- };
-
- /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
- #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
- #define cpuhp_lock_acquire_tryread() \
- lock_map_acquire_tryread(&cpu_hotplug.dep_map)
- #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
- #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
+ DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
-
- void get_online_cpus(void)
+ void cpus_read_lock(void)
{
- might_sleep();
- if (cpu_hotplug.active_writer == current)
- return;
- cpuhp_lock_acquire_read();
- mutex_lock(&cpu_hotplug.lock);
- atomic_inc(&cpu_hotplug.refcount);
- mutex_unlock(&cpu_hotplug.lock);
+ percpu_down_read(&cpu_hotplug_lock);
}
- EXPORT_SYMBOL_GPL(get_online_cpus);
+ EXPORT_SYMBOL_GPL(cpus_read_lock);
- void put_online_cpus(void)
+ void cpus_read_unlock(void)
{
- int refcount;
-
- if (cpu_hotplug.active_writer == current)
- return;
-
- refcount = atomic_dec_return(&cpu_hotplug.refcount);
- if (WARN_ON(refcount < 0)) /* try to fix things up */
- atomic_inc(&cpu_hotplug.refcount);
-
- if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
- wake_up(&cpu_hotplug.wq);
-
- cpuhp_lock_release();
-
+ percpu_up_read(&cpu_hotplug_lock);
}
- EXPORT_SYMBOL_GPL(put_online_cpus);
+ EXPORT_SYMBOL_GPL(cpus_read_unlock);
- /*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- * writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- * non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
- void cpu_hotplug_begin(void)
+ void cpus_write_lock(void)
{
- DEFINE_WAIT(wait);
-
- cpu_hotplug.active_writer = current;
- cpuhp_lock_acquire();
+ percpu_down_write(&cpu_hotplug_lock);
+ }
- for (;;) {
- mutex_lock(&cpu_hotplug.lock);
- prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
- if (likely(!atomic_read(&cpu_hotplug.refcount)))
- break;
- mutex_unlock(&cpu_hotplug.lock);
- schedule();
- }
- finish_wait(&cpu_hotplug.wq, &wait);
+ void cpus_write_unlock(void)
+ {
+ percpu_up_write(&cpu_hotplug_lock);
}
- void cpu_hotplug_done(void)
+ void lockdep_assert_cpus_held(void)
{
- cpu_hotplug.active_writer = NULL;
- mutex_unlock(&cpu_hotplug.lock);
- cpuhp_lock_release();
+ percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
/*
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
- /* Notifier wrappers for transitioning to state machine */
-
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
st->should_run = false;
+ lock_map_acquire(&cpuhp_state_lock_map);
/* Single callback invocation for [un]install ? */
if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
else if (st->state > st->target)
ret = cpuhp_ap_offline(cpu, st);
}
+ lock_map_release(&cpuhp_state_lock_map);
st->result = ret;
complete(&st->done);
}
if (!cpu_online(cpu))
return 0;
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
+
/*
* If we are up and running, use the hotplug thread. For early calls
* we invoke the thread function directly.
enum cpuhp_state state = st->state;
trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
__cpuhp_kick_ap_work(st);
wait_for_completion(&st->done);
trace_cpuhp_exit(cpu, st->state, state, st->result);
rcu_read_unlock();
}
- static inline void check_for_tasks(int dead_cpu)
- {
- struct task_struct *g, *p;
-
- read_lock(&tasklist_lock);
- for_each_process_thread(g, p) {
- if (!p->on_rq)
- continue;
- /*
- * We do the check with unlocked task_rq(p)->lock.
- * Order the reading to do not warn about a task,
- * which was running on this cpu in the past, and
- * it's just been woken on another cpu.
- */
- rmb();
- if (task_cpu(p) != dead_cpu)
- continue;
-
- pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
- p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
- }
- read_unlock(&tasklist_lock);
- }
-
/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU refused to die */
irq_unlock_sparse();
if (!cpu_present(cpu))
return -EINVAL;
- cpu_hotplug_begin();
+ cpus_write_lock();
cpuhp_tasks_frozen = tasks_frozen;
}
out:
- cpu_hotplug_done();
+ cpus_write_unlock();
return ret;
}
struct task_struct *idle;
int ret = 0;
- cpu_hotplug_begin();
+ cpus_write_lock();
if (!cpu_present(cpu)) {
ret = -EINVAL;
target = min((int)target, CPUHP_BRINGUP_CPU);
ret = cpuhp_up_callbacks(cpu, st, target);
out:
- cpu_hotplug_done();
+ cpus_write_unlock();
return ret;
}
.startup.single = smpboot_unpark_threads,
.teardown.single = NULL,
},
+ [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+ .name = "irq/affinity:online",
+ .startup.single = irq_affinity_online_cpu,
+ .teardown.single = NULL,
+ },
[CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online",
.startup.single = perf_event_init_cpu,
}
}
- int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
- bool invoke)
+ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node,
+ bool invoke)
{
struct cpuhp_step *sp;
int cpu;
int ret;
+ lockdep_assert_cpus_held();
+
sp = cpuhp_get_step(state);
if (sp->multi_instance == false)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
hlist_add_head(node, &sp->list);
unlock:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ return ret;
+ }
+
+ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ bool invoke)
+ {
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
* @multi_instance: State is set up for multiple instances which get
* added afterwards.
*
+ * The caller needs to hold cpus read locked while calling this function.
* Returns:
* On success:
* Positive state number if @state is CPUHP_AP_ONLINE_DYN
* 0 for all other states
* On failure: proper (negative) error code
*/
- int __cpuhp_setup_state(enum cpuhp_state state,
- const char *name, bool invoke,
- int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu),
- bool multi_instance)
+ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
{
int cpu, ret = 0;
bool dynstate;
+ lockdep_assert_cpus_held();
+
if (cpuhp_cb_check(state) || !name)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
}
out:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
/*
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
* dynamically allocated state in case of success.
return state;
return ret;
}
+ EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+ int __cpuhp_setup_state(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
+ {
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+ teardown, multi_instance);
+ cpus_read_unlock();
+ return ret;
+ }
EXPORT_SYMBOL(__cpuhp_setup_state);
int __cpuhp_state_remove_instance(enum cpuhp_state state,
if (!sp->multi_instance)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !cpuhp_get_teardown_cb(state))
remove:
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
/**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
* @state: The state to remove
* @invoke: If true, the teardown function is invoked for cpus where
* cpu state >= @state
*
+ * The caller needs to hold cpus read locked while calling this function.
* The teardown callback is currently not allowed to fail. Think
* about module removal!
*/
- void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
{
struct cpuhp_step *sp = cpuhp_get_step(state);
int cpu;
BUG_ON(cpuhp_cb_check(state));
- get_online_cpus();
+ lockdep_assert_cpus_held();
mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ }
+ EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+ {
+ cpus_read_lock();
+ __cpuhp_remove_state_cpuslocked(state, invoke);
+ cpus_read_unlock();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
mutex_unlock(&cpuhp_state_mutex);
if (ret)
- return ret;
+ goto out;
if (st->state < target)
ret = do_cpu_up(dev->id, target);
else
ret = do_cpu_down(dev->id, target);
-
+out:
unlock_device_hotplug();
return ret ? ret : count;
}
NULL
};
- static struct attribute_group cpuhp_cpu_attr_group = {
+ static const struct attribute_group cpuhp_cpu_attr_group = {
.attrs = cpuhp_cpu_attrs,
.name = "hotplug",
NULL
NULL
};
- static struct attribute_group cpuhp_cpu_root_attr_group = {
+ static const struct attribute_group cpuhp_cpu_root_attr_group = {
.attrs = cpuhp_cpu_root_attrs,
.name = "hotplug",
NULL
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
+ static cpumask_var_t perf_online_mask;
/*
* perf event paranoia level:
return 0;
}
-static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
-{
- return 0;
-}
-
static inline void update_cgrp_time_from_event(struct perf_event *event)
{
}
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
- /*
- * We could be clever and allow to attach a event to an
- * offline CPU and activate it when the CPU comes up, but
- * that's for later.
- */
- if (!cpu_online(cpu))
- return ERR_PTR(-ENODEV);
-
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
__output_copy(handle, values, n * sizeof(u64));
}
-/*
- * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
- */
static void perf_output_read_group(struct perf_output_handle *handle,
struct perf_event *event,
u64 enabled, u64 running)
#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
PERF_FORMAT_TOTAL_TIME_RUNNING)
+/*
+ * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
+ *
+ * The problem is that its both hard and excessively expensive to iterate the
+ * child list, not to mention that its impossible to IPI the children running
+ * on another CPU, from interrupt/NMI context.
+ */
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
return __perf_event_account_interrupt(event, 1);
}
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+ /*
+ * Due to interrupt latency (AKA "skid"), we may enter the
+ * kernel before taking an overflow, even if the PMU is only
+ * counting user events.
+ * To avoid leaking information to userspace, we must always
+ * reject kernel samples when exclude_kernel is set.
+ */
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return false;
+
+ return true;
+}
+
/*
* Generic event overflow handling, sampling.
*/
ret = __perf_event_account_interrupt(event, throttle);
+ /*
+ * For security, drop the skid kernel samples if necessary.
+ */
+ if (!sample_is_allowed(event, regs))
+ return ret;
+
/*
* XXX event_limit might not quite work as expected on inherited
* events
int err = 0;
mutex_lock(&swhash->hlist_mutex);
- if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+ if (!swevent_hlist_deref(swhash) &&
+ cpumask_test_cpu(cpu, perf_online_mask)) {
struct swevent_hlist *hlist;
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
{
int err, cpu, failed_cpu;
- get_online_cpus();
+ mutex_lock(&pmus_lock);
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(cpu);
if (err) {
goto fail;
}
}
- put_online_cpus();
-
+ mutex_unlock(&pmus_lock);
return 0;
fail:
for_each_possible_cpu(cpu) {
break;
swevent_hlist_put_cpu(cpu);
}
-
- put_online_cpus();
+ mutex_unlock(&pmus_lock);
return err;
}
pmu->hrtimer_interval_ms = timer;
/* update all cpuctx for this PMU */
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpu_function_call(cpu,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
- put_online_cpus();
+ cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
return count;
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
+ cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
__perf_mux_hrtimer_init(cpuctx, cpu);
}
static struct pmu *perf_init_event(struct perf_event *event)
{
- struct pmu *pmu = NULL;
+ struct pmu *pmu;
int idx;
int ret;
local64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_FORMAT_GROUP on inherited events
+ * We currently do not support PERF_SAMPLE_READ on inherited events.
+ * See perf_output_read().
*/
- if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+ if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
goto err_ns;
if (!has_branch_stack(event))
}
pmu = perf_init_event(event);
- if (!pmu)
- goto err_ns;
- else if (IS_ERR(pmu)) {
+ if (IS_ERR(pmu)) {
err = PTR_ERR(pmu);
goto err_ns;
}
event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
sizeof(unsigned long),
GFP_KERNEL);
- if (!event->addr_filters_offs)
+ if (!event->addr_filters_offs) {
+ err = -ENOMEM;
goto err_per_task;
+ }
/* force hw sync on the address filters */
event->addr_filters_gen = 1;
goto err_task;
}
- get_online_cpus();
-
if (task) {
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (err)
- goto err_cpus;
+ goto err_task;
/*
* Reuse ptrace permission checks for now.
goto err_locked;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_locked;
+ }
+ }
+
+
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
put_task_struct(task);
}
- put_online_cpus();
-
mutex_lock(¤t->perf_event_mutex);
list_add_tail(&event->owner_entry, ¤t->perf_event_list);
mutex_unlock(¤t->perf_event_mutex);
err_cred:
if (task)
mutex_unlock(&task->signal->cred_guard_mutex);
- err_cpus:
- put_online_cpus();
err_task:
if (task)
put_task_struct(task);
goto err_unlock;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+ }
+
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
struct swevent_htable *swhash;
int cpu;
+ zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
}
}
- int perf_event_init_cpu(unsigned int cpu)
+ void perf_swevent_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
- return 0;
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void perf_event_exit_cpu_context(int cpu)
{
+ struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
- int idx;
- idx = srcu_read_lock(&pmus_srcu);
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+ mutex_lock(&pmus_lock);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+ cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
- srcu_read_unlock(&pmus_srcu, idx);
+ cpumask_clear_cpu(cpu, perf_online_mask);
+ mutex_unlock(&pmus_lock);
}
#else
#endif
+ int perf_event_init_cpu(unsigned int cpu)
+ {
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+
+ perf_swevent_init_cpu(cpu);
+
+ mutex_lock(&pmus_lock);
+ cpumask_set_cpu(cpu, perf_online_mask);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
+
+ mutex_lock(&ctx->mutex);
+ cpuctx->online = 1;
+ mutex_unlock(&ctx->mutex);
+ }
+ mutex_unlock(&pmus_lock);
+
+ return 0;
+ }
+
int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
return module_alloc(PAGE_SIZE);
}
-static void free_insn_page(void *page)
+void __weak free_insn_page(void *page)
{
module_memfree(page);
}
*/
static void do_optimize_kprobes(void)
{
- /* Optimization never be done when disarmed */
- if (kprobes_all_disarmed || !kprobes_allow_optimization ||
- list_empty(&optimizing_list))
- return;
-
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
* This combination can cause a deadlock (cpu-hotplug try to lock
* text_mutex but stop_machine can not be done because online_cpus
* has been changed)
- * To avoid this deadlock, we need to call get_online_cpus()
+ * To avoid this deadlock, caller must have locked cpu hotplug
* for preventing cpu-hotplug outside of text_mutex locking.
*/
- get_online_cpus();
+ lockdep_assert_cpus_held();
+
+ /* Optimization never be done when disarmed */
+ if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+ list_empty(&optimizing_list))
+ return;
+
mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/*
{
struct optimized_kprobe *op, *tmp;
+ /* See comment in do_optimize_kprobes() */
+ lockdep_assert_cpus_held();
+
/* Unoptimization must be done anytime */
if (list_empty(&unoptimizing_list))
return;
- /* Ditto to do_optimize_kprobes */
- get_online_cpus();
mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
list_del_init(&op->list);
}
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/* Reclaim all kprobes on the free_list */
static void kprobe_optimizer(struct work_struct *work)
{
mutex_lock(&kprobe_mutex);
+ cpus_read_lock();
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
/* Short cut to direct unoptimizing */
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{
- get_online_cpus();
+ lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
- put_online_cpus();
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
}
return;
/* For preparing optimization, jump_label_text_reserved() is called */
+ cpus_read_lock();
jump_label_lock();
mutex_lock(&text_mutex);
out:
mutex_unlock(&text_mutex);
jump_label_unlock();
+ cpus_read_unlock();
}
#ifdef CONFIG_SYSCTL
if (kprobes_allow_optimization)
goto out;
+ cpus_read_lock();
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
+ cpus_read_unlock();
printk(KERN_INFO "Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
return;
}
+ cpus_read_lock();
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
unoptimize_kprobe(p, false);
}
}
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Wait for unoptimizing completion */
arm_kprobe_ftrace(kp);
return;
}
- /*
- * Here, since __arm_kprobe() doesn't use stop_machine(),
- * this doesn't cause deadlock on text_mutex. So, we don't
- * need get_online_cpus().
- */
+ cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/* Disarm a kprobe with text_mutex */
disarm_kprobe_ftrace(kp);
return;
}
- /* Ditto */
+
+ cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/*
int ret = 0;
struct kprobe *ap = orig_p;
+ cpus_read_lock();
+
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
- /*
- * Get online CPUs to avoid text_mutex deadlock.with stop machine,
- * which is invoked by unoptimize_kprobe() in add_new_kprobe()
- */
- get_online_cpus();
mutex_lock(&text_mutex);
if (!kprobe_aggrprobe(orig_p)) {
out:
mutex_unlock(&text_mutex);
- put_online_cpus();
jump_label_unlock();
+ cpus_read_unlock();
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
goto out;
}
- mutex_lock(&text_mutex); /* Avoiding text modification */
+ cpus_read_lock();
+ /* Prevent text modification */
+ mutex_lock(&text_mutex);
ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
if (ret)
goto out;
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
-
out:
mutex_unlock(&kprobe_mutex);