ftrace_filter=[function-list]
[FTRACE] Limit the functions traced by the function
- tracer at boot up. function-list is a comma separated
+ tracer at boot up. function-list is a comma-separated
list of functions. This list can be changed at run
time by the set_ftrace_filter file in the debugfs
tracing directory.
ftrace_graph_filter=[function-list]
[FTRACE] Limit the top level callers functions traced
by the function graph tracer at boot up.
- function-list is a comma separated list of functions
+ function-list is a comma-separated list of functions
that can be changed at run time by the
set_graph_function file in the debugfs tracing directory.
ftrace_graph_notrace=[function-list]
[FTRACE] Do not trace from the functions specified in
- function-list. This list is a comma separated list of
+ function-list. This list is a comma-separated list of
functions that can be changed at run time by the
set_graph_notrace file in the debugfs tracing directory.
Note that using this option lowers the security
provided by tboot because it makes the system
vulnerable to DMA attacks.
- nobounce [Default off]
- Disable bounce buffer for untrusted devices such as
- the Thunderbolt devices. This will treat the untrusted
- devices as the trusted ones, hence might expose security
- risks of DMA attacks.
intel_idle.max_cstate= [KNL,HW,ACPI,X86]
0 disables intel_idle and fall back on acpi_idle.
for all guests.
Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
+ kvm-arm.mode=
+ [KVM,ARM] Select one of KVM/arm64's modes of operation.
+
+ protected: nVHE-based mode with support for guests whose
+ state is kept private from the host.
+ Not valid if the kernel is running in EL2.
+
+ Defaults to VHE/nVHE based on hardware support and
+ the value of CONFIG_ARM64_VHE.
+
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0
system registers
when set.
Format: <int>
- libata.force= [LIBATA] Force configurations. The format is comma
+ libata.force= [LIBATA] Force configurations. The format is comma-
separated list of "[ID:]VAL" where ID is
PORT[.DEVICE]. PORT and DEVICE are decimal numbers
matching port, link or device. Basically, it matches
option description.
memmap=nn[KMG]@ss[KMG]
- [KNL] Force usage of a specific region of memory.
+ [KNL, X86, MIPS, XTENSA] Force usage of a specific region of memory.
Region of memory to be used is from ss to ss+nn.
If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
which limits max address to nn[KMG].
mtdset= [ARM]
ARM/S3C2412 JIVE boot control
- See arch/arm/mach-s3c2412/mach-jive.c
+ See arch/arm/mach-s3c/mach-jive.c
mtouchusb.raw_coordinates=
[HW] Make the MicroTouch USB driver use raw coordinates
nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
+ nosgx [X86-64,SGX] Disables Intel SGX kernel support.
+
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
- will trace at boot up. function-list is a comma separated
+ will trace at boot up. function-list is a comma-separated
list of functions. This list can be changed at run
time by the stack_trace_filter file in the debugfs
tracing directory. Note, this enables stack tracing
trace_event=[event-list]
[FTRACE] Set and start specified trace events in order
to facilitate early boot debugging. The event-list is a
- comma separated list of trace events to enable. See
+ comma-separated list of trace events to enable. See
also Documentation/trace/events.rst
trace_options=[option-list]
device);
j = NO_REPORT_LUNS (don't use report luns
command, uas only);
+ k = NO_SAME (do not use WRITE_SAME, uas only)
l = NOT_LOCKABLE (don't try to lock and
unlock ejectable media, not on uas);
m = MAX_SECTORS_64 (don't transfer more
This option is obsoleted by the "nopv" option, which
has equivalent effect for XEN platform.
+ xen_no_vector_callback
+ [KNL,X86,XEN] Disable the vector callback for Xen
+ event channel interrupts.
+
xen_scrub_pages= [XEN]
Boolean option to control scrubbing pages before giving them back
to Xen, for use by other domains. Can be also changed at runtime
struct list_head list;
struct list_head eoi_list;
short refcnt;
- short spurious_cnt;
+ u8 spurious_cnt;
+ u8 is_accounted;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
+/* Event channel distribution data */
+static atomic_t channels_on_cpu[NR_CPUS];
+
static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
irq_set_chip_data(irq, info);
}
+/* Per CPU channel accounting */
+static void channels_on_cpu_dec(struct irq_info *info)
+{
+ if (!info->is_accounted)
+ return;
+
+ info->is_accounted = 0;
+
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
+}
+
+static void channels_on_cpu_inc(struct irq_info *info)
+{
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
+ INT_MAX)))
+ return;
+
+ info->is_accounted = 1;
+}
+
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
{
set_evtchn_to_irq(info->evtchn, -1);
info->evtchn = 0;
+ channels_on_cpu_dec(info);
}
/*
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
-#endif
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ cpumask_copy(irq_get_effective_affinity_mask(irq),
+ cpumask_of(cpu));
+ }
+
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+ channels_on_cpu_dec(info);
info->cpu = cpu;
+ channels_on_cpu_inc(info);
}
/**
return;
if (spurious) {
- if ((1 << info->spurious_cnt) < (HZ << 2))
- info->spurious_cnt++;
+ if ((1 << info->spurious_cnt) < (HZ << 2)) {
+ if (info->spurious_cnt != 0xFF)
+ info->spurious_cnt++;
+ }
if (info->spurious_cnt > 1) {
delay = 1 << (info->spurious_cnt - 2);
if (delay > HZ)
{
struct irq_info *info;
-#ifdef CONFIG_SMP
- /* By default all event channels notify CPU#0. */
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
-#endif
-
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL)
panic("Unable to allocate metadata for IRQ%d\n", irq);
info->refcnt = -1;
set_info_for_irq(irq, info);
+ /*
+ * Interrupt affinity setting can be immediate. No point
+ * in delaying it until an interrupt is handled.
+ */
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
if (!VALID_EVTCHN(evtchn))
return;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
-
- clear_evtchn(evtchn);
-
- irq_move_masked_irq(data);
-
- if (!masked)
- unmask_evtchn(evtchn);
- } else
- clear_evtchn(evtchn);
+ clear_evtchn(evtchn);
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
goto err;
info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0, false);
rc = xen_evtchn_port_setup(evtchn);
if (rc)
irq = ret;
goto out;
}
- /* New interdomain events are bound to VCPU 0. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /*
+ * New interdomain events are initially bound to vCPU0 This
+ * is required to setup the event channel in the first
+ * place and also important for UP guests because the
+ * affinity setting is not invoked on them so nothing would
+ * bind the channel.
+ */
+ bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
-{
- return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
-
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
irq = ret;
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
mutex_unlock(&irq_mapping_update_lock);
- bind_evtchn_to_cpu(evtchn, info->cpu);
- /* This will be deferred until interrupt is processed */
- irq_set_affinity(irq, cpumask_of(info->cpu));
+ bind_evtchn_to_cpu(evtchn, info->cpu, false);
/* Unmask the event channel. */
enable_irq(irq);
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu, false);
if (!masked)
unmask_evtchn(evtchn);
return 0;
}
+/*
+ * Find the CPU within @dest mask which has the least number of channels
+ * assigned. This is not precise as the per cpu counts can be modified
+ * concurrently.
+ */
+static unsigned int select_target_cpu(const struct cpumask *dest)
+{
+ unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
+
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
+
+ if (curch < minch) {
+ minch = curch;
+ best_cpu = cpu;
+ }
+ }
+
+ /*
+ * Catch the unlikely case that dest contains no online CPUs. Can't
+ * recurse.
+ */
+ if (best_cpu == UINT_MAX)
+ return select_target_cpu(cpu_online_mask);
+
+ return best_cpu;
+}
+
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
- int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+ unsigned int tcpu = select_target_cpu(dest);
+ int ret;
+ ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
if (!ret)
irq_data_update_effective_affinity(data, cpumask_of(tcpu));
return ret;
}
-/* To be called with desc->lock held. */
-int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu)
-{
- struct irq_data *d = irq_desc_get_irq_data(desc);
-
- return set_affinity_irq(d, cpumask_of(tcpu), false);
-}
-EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn);
-
static void enable_dynirq(struct irq_data *data)
{
evtchn_port_t evtchn = evtchn_from_irq(data->irq);
if (!VALID_EVTCHN(evtchn))
return;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
-
- clear_evtchn(evtchn);
-
- irq_move_masked_irq(data);
-
- if (!masked)
- unmask_evtchn(evtchn);
- } else
- clear_evtchn(evtchn);
+ clear_evtchn(evtchn);
}
static void mask_ack_dynirq(struct irq_data *data)
/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
- list_for_each_entry(info, &xen_irq_list_head, list)
- info->evtchn = 0; /* zap event-channel binding */
+ list_for_each_entry(info, &xen_irq_list_head, list) {
+ /* Zap event-channel binding */
+ info->evtchn = 0;
+ /* Adjust accounting */
+ channels_on_cpu_dec(info);
+ }
clear_evtchn_to_irq_all();
.irq_ack = ack_dynirq,
};
- int xen_set_callback_via(uint64_t via)
- {
- struct xen_hvm_param a;
- a.domid = DOMID_SELF;
- a.index = HVM_PARAM_CALLBACK_IRQ;
- a.value = via;
- return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
- }
- EXPORT_SYMBOL_GPL(xen_set_callback_via);
-
#ifdef CONFIG_XEN_PVHVM
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any