Merge tag 'for-linus-5.11-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <[email protected]>

Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)

committer Linus Torvalds <[email protected]>

Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)
author Linus Torvalds <[email protected]>
Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)
committer Linus Torvalds <[email protected]>
Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index 9e3cdb271d06cfc198e80824ecfd83b305ae326c,035d27b6272cdc0236b958121fb8fefc4576eaa1..a10b545c2070a54455c3e7181cc43135dc68f611
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -1385,7 -1385,7 +1385,7 @@@
   
         ftrace_filter=[function-list]
                         [FTRACE] Limit the functions traced by the function
- -                      tracer at boot up. function-list is a comma separated
+ +                      tracer at boot up. function-list is a comma-separated
                         list of functions. This list can be changed at run
                         time by the set_ftrace_filter file in the debugfs
                         tracing directory.
@@@ -1399,13 -1399,13 +1399,13 @@@
         ftrace_graph_filter=[function-list]
                         [FTRACE] Limit the top level callers functions traced
                         by the function graph tracer at boot up.
- -                      function-list is a comma separated list of functions
+ +                      function-list is a comma-separated list of functions
                         that can be changed at run time by the
                         set_graph_function file in the debugfs tracing directory.
   
         ftrace_graph_notrace=[function-list]
                         [FTRACE] Do not trace from the functions specified in
- -                      function-list.  This list is a comma separated list of
+ +                      function-list.  This list is a comma-separated list of
                         functions that can be changed at run time by the
                         set_graph_notrace file in the debugfs tracing directory.
   
@@@ -1883,6 -1883,11 +1883,6 @@@
                         Note that using this option lowers the security
                         provided by tboot because it makes the system
                         vulnerable to DMA attacks.
- -              nobounce [Default off]
- -                      Disable bounce buffer for untrusted devices such as
- -                      the Thunderbolt devices. This will treat the untrusted
- -                      devices as the trusted ones, hence might expose security
- -                      risks of DMA attacks.
   
         intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                         0       disables intel_idle and fall back on acpi_idle.
@@@ -2254,16 -2259,6 +2254,16 @@@
                         for all guests.
                         Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
   
+ +      kvm-arm.mode=
+ +                      [KVM,ARM] Select one of KVM/arm64's modes of operation.
+ +
+ +                      protected: nVHE-based mode with support for guests whose
+ +                                 state is kept private from the host.
+ +                                 Not valid if the kernel is running in EL2.
+ +
+ +                      Defaults to VHE/nVHE based on hardware support and
+ +                      the value of CONFIG_ARM64_VHE.
+ +
         kvm-arm.vgic_v3_group0_trap=
                         [KVM,ARM] Trap guest accesses to GICv3 group-0
                         system registers
@@@ -2421,7 -2416,7 +2421,7 @@@
                         when set.
                         Format: <int>
   
- -      libata.force=   [LIBATA] Force configurations.  The format is comma
+ +      libata.force=   [LIBATA] Force configurations.  The format is comma-
                         separated list of "[ID:]VAL" where ID is
                         PORT[.DEVICE].  PORT and DEVICE are decimal numbers
                         matching port, link or device.  Basically, it matches
@@@ -2714,7 -2709,7 +2714,7 @@@
                         option description.
   
         memmap=nn[KMG]@ss[KMG]
- -                      [KNL] Force usage of a specific region of memory.
+ +                      [KNL, X86, MIPS, XTENSA] Force usage of a specific region of memory.
                         Region of memory to be used is from ss to ss+nn.
                         If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
                         which limits max address to nn[KMG].
@@@ -2958,7 -2953,7 +2958,7 @@@
         mtdset=         [ARM]
                         ARM/S3C2412 JIVE boot control
   
- -                      See arch/arm/mach-s3c2412/mach-jive.c
+ +                      See arch/arm/mach-s3c/mach-jive.c
   
         mtouchusb.raw_coordinates=
                         [HW] Make the MicroTouch USB driver use raw coordinates
@@@ -3380,8 -3375,6 +3380,8 @@@
   
         nosep           [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
   
+ +      nosgx           [X86-64,SGX] Disables Intel SGX kernel support.
+ +
         nosmp           [SMP] Tells an SMP kernel to act as a UP kernel,
                         and disable the IO APIC.  legacy for "maxcpus=0".
   
@@@ -5145,7 -5138,7 +5145,7 @@@
   
         stacktrace_filter=[function-list]
                         [FTRACE] Limit the functions that the stack tracer
- -                      will trace at boot up. function-list is a comma separated
+ +                      will trace at boot up. function-list is a comma-separated
                         list of functions. This list can be changed at run
                         time by the stack_trace_filter file in the debugfs
                         tracing directory. Note, this enables stack tracing
@@@ -5348,7 -5341,7 +5348,7 @@@
         trace_event=[event-list]
                         [FTRACE] Set and start specified trace events in order
                         to facilitate early boot debugging. The event-list is a
- -                      comma separated list of trace events to enable. See
+ +                      comma-separated list of trace events to enable. See
                         also Documentation/trace/events.rst
   
         trace_options=[option-list]
@@@ -5670,7 -5663,6 +5670,7 @@@
                                         device);
                                 j = NO_REPORT_LUNS (don't use report luns
                                         command, uas only);
+ +                              k = NO_SAME (do not use WRITE_SAME, uas only)
                                 l = NOT_LOCKABLE (don't try to lock and
                                         unlock ejectable media, not on uas);
                                 m = MAX_SECTORS_64 (don't transfer more
@@@ -5972,6 -5964,10 +5972,10 @@@
                         This option is obsoleted by the "nopv" option, which
                         has equivalent effect for XEN platform.
   
+       xen_no_vector_callback
+                       [KNL,X86,XEN] Disable the vector callback for Xen
+                       event channel interrupts.
+ 
         xen_scrub_pages=        [XEN]
                         Boolean option to control scrubbing pages before giving them back
                         to Xen, for use by other domains. Can be also changed at runtime
diff --combined drivers/xen/events/events_base.c

index a8030332a19169b4805c8537b2a0df359a491833,bbebe248b72647d631ab9838ae6f43ad3363ba2a..e850f79351cbb4690fe757293935daeab179c5bf
--- 1/drivers/xen/events/events_base.c
--- 2/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@@ -95,8 -95,7 +95,8 @@@ struct irq_info 
         struct list_head list;
         struct list_head eoi_list;
         short refcnt;
- -      short spurious_cnt;
+ +      u8 spurious_cnt;
+ +      u8 is_accounted;
         enum xen_irq_type type; /* type */
         unsigned irq;
         evtchn_port_t evtchn;   /* event channel */
@@@ -162,9 -161,6 +162,9 @@@ static DEFINE_PER_CPU(int [NR_VIRQS], v
   /* IRQ <-> IPI mapping */
   static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
   
+ +/* Event channel distribution data */
+ +static atomic_t channels_on_cpu[NR_CPUS];
+ +
   static int **evtchn_to_irq;
   #ifdef CONFIG_X86
   static unsigned long *pirq_eoi_map;
@@@ -261,32 -257,6 +261,32 @@@ static void set_info_for_irq(unsigned i
                 irq_set_chip_data(irq, info);
   }
   
+ +/* Per CPU channel accounting */
+ +static void channels_on_cpu_dec(struct irq_info *info)
+ +{
+ +      if (!info->is_accounted)
+ +              return;
+ +
+ +      info->is_accounted = 0;
+ +
+ +      if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ +              return;
+ +
+ +      WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
+ +}
+ +
+ +static void channels_on_cpu_inc(struct irq_info *info)
+ +{
+ +      if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ +              return;
+ +
+ +      if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
+ +                                          INT_MAX)))
+ +              return;
+ +
+ +      info->is_accounted = 1;
+ +}
+ +
   /* Constructors for packed IRQ information. */
   static int xen_irq_info_common_setup(struct irq_info *info,
                                      unsigned irq,
@@@ -369,7 -339,6 +369,7 @@@ static void xen_irq_info_cleanup(struc
   {
         set_evtchn_to_irq(info->evtchn, -1);
         info->evtchn = 0;
+ +      channels_on_cpu_dec(info);
   }
   
   /*
@@@ -464,25 -433,18 +464,25 @@@ static bool pirq_needs_eoi_flag(unsigne
         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
   }
   
- -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
+ +static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ +                             bool force_affinity)
   {
         int irq = get_evtchn_to_irq(evtchn);
         struct irq_info *info = info_for_irq(irq);
   
         BUG_ON(irq == -1);
- -#ifdef CONFIG_SMP
- -      cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
- -#endif
+ +
+ +      if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ +              cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ +              cpumask_copy(irq_get_effective_affinity_mask(irq),
+ +                           cpumask_of(cpu));
+ +      }
+ +
         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
   
+ +      channels_on_cpu_dec(info);
         info->cpu = cpu;
+ +      channels_on_cpu_inc(info);
   }
   
   /**
@@@ -561,10 -523,8 +561,10 @@@ static void xen_irq_lateeoi_locked(stru
                 return;
   
         if (spurious) {
- -              if ((1 << info->spurious_cnt) < (HZ << 2))
- -                      info->spurious_cnt++;
+ +              if ((1 << info->spurious_cnt) < (HZ << 2)) {
+ +                      if (info->spurious_cnt != 0xFF)
+ +                              info->spurious_cnt++;
+ +              }
                 if (info->spurious_cnt > 1) {
                         delay = 1 << (info->spurious_cnt - 2);
                         if (delay > HZ)
@@@ -655,6 -615,11 +655,6 @@@ static void xen_irq_init(unsigned irq
   {
         struct irq_info *info;
   
- -#ifdef CONFIG_SMP
- -      /* By default all event channels notify CPU#0. */
- -      cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
- -#endif
- -
         info = kzalloc(sizeof(*info), GFP_KERNEL);
         if (info == NULL)
                 panic("Unable to allocate metadata for IRQ%d\n", irq);
@@@ -663,11 -628,6 +663,11 @@@
         info->refcnt = -1;
   
         set_info_for_irq(irq, info);
+ +      /*
+ +       * Interrupt affinity setting can be immediate. No point
+ +       * in delaying it until an interrupt is handled.
+ +       */
+ +      irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
   
         INIT_LIST_HEAD(&info->eoi_list);
         list_add_tail(&info->list, &xen_irq_list_head);
@@@ -779,7 -739,18 +779,7 @@@ static void eoi_pirq(struct irq_data *d
         if (!VALID_EVTCHN(evtchn))
                 return;
   
- -      if (unlikely(irqd_is_setaffinity_pending(data)) &&
- -          likely(!irqd_irq_disabled(data))) {
- -              int masked = test_and_set_mask(evtchn);
- -
- -              clear_evtchn(evtchn);
- -
- -              irq_move_masked_irq(data);
- -
- -              if (!masked)
- -                      unmask_evtchn(evtchn);
- -      } else
- -              clear_evtchn(evtchn);
+ +      clear_evtchn(evtchn);
   
         if (pirq_needs_eoi(data->irq)) {
                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@@ -823,7 -794,7 +823,7 @@@ static unsigned int __startup_pirq(unsi
                 goto err;
   
         info->evtchn = evtchn;
- -      bind_evtchn_to_cpu(evtchn, 0);
+ +      bind_evtchn_to_cpu(evtchn, 0, false);
   
         rc = xen_evtchn_port_setup(evtchn);
         if (rc)
@@@ -1142,14 -1113,8 +1142,14 @@@ static int bind_evtchn_to_irq_chip(evtc
                         irq = ret;
                         goto out;
                 }
- -              /* New interdomain events are bound to VCPU 0. */
- -              bind_evtchn_to_cpu(evtchn, 0);
+ +              /*
+ +               * New interdomain events are initially bound to vCPU0 This
+ +               * is required to setup the event channel in the first
+ +               * place and also important for UP guests because the
+ +               * affinity setting is not invoked on them so nothing would
+ +               * bind the channel.
+ +               */
+ +              bind_evtchn_to_cpu(evtchn, 0, false);
         } else {
                 struct irq_info *info = info_for_irq(irq);
                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@@ -1167,6 -1132,12 +1167,6 @@@ int bind_evtchn_to_irq(evtchn_port_t ev
   }
   EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
   
- -int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
- -{
- -      return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
- -}
- -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
- -
   static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
   {
         struct evtchn_bind_ipi bind_ipi;
@@@ -1197,11 -1168,7 +1197,11 @@@
                         irq = ret;
                         goto out;
                 }
- -              bind_evtchn_to_cpu(evtchn, cpu);
+ +              /*
+ +               * Force the affinity mask to the target CPU so proc shows
+ +               * the correct target.
+ +               */
+ +              bind_evtchn_to_cpu(evtchn, cpu, true);
         } else {
                 struct irq_info *info = info_for_irq(irq);
                 WARN_ON(info == NULL || info->type != IRQT_IPI);
@@@ -1314,11 -1281,7 +1314,11 @@@ int bind_virq_to_irq(unsigned int virq
                         goto out;
                 }
   
- -              bind_evtchn_to_cpu(evtchn, cpu);
+ +              /*
+ +               * Force the affinity mask for percpu interrupts so proc
+ +               * shows the correct target.
+ +               */
+ +              bind_evtchn_to_cpu(evtchn, cpu, percpu);
         } else {
                 struct irq_info *info = info_for_irq(irq);
                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@@@ -1683,7 -1646,9 +1683,7 @@@ void rebind_evtchn_irq(evtchn_port_t ev
   
         mutex_unlock(&irq_mapping_update_lock);
   
- -        bind_evtchn_to_cpu(evtchn, info->cpu);
- -      /* This will be deferred until interrupt is processed */
- -      irq_set_affinity(irq, cpumask_of(info->cpu));
+ +      bind_evtchn_to_cpu(evtchn, info->cpu, false);
   
         /* Unmask the event channel. */
         enable_irq(irq);
@@@ -1717,7 -1682,7 +1717,7 @@@ static int xen_rebind_evtchn_to_cpu(evt
          * it, but don't do the xenlinux-level rebind in that case.
          */
         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- -              bind_evtchn_to_cpu(evtchn, tcpu);
+ +              bind_evtchn_to_cpu(evtchn, tcpu, false);
   
         if (!masked)
                 unmask_evtchn(evtchn);
@@@ -1725,47 -1690,27 +1725,47 @@@
         return 0;
   }
   
+ +/*
+ + * Find the CPU within @dest mask which has the least number of channels
+ + * assigned. This is not precise as the per cpu counts can be modified
+ + * concurrently.
+ + */
+ +static unsigned int select_target_cpu(const struct cpumask *dest)
+ +{
+ +      unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
+ +
+ +      for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ +              unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
+ +
+ +              if (curch < minch) {
+ +                      minch = curch;
+ +                      best_cpu = cpu;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * Catch the unlikely case that dest contains no online CPUs. Can't
+ +       * recurse.
+ +       */
+ +      if (best_cpu == UINT_MAX)
+ +              return select_target_cpu(cpu_online_mask);
+ +
+ +      return best_cpu;
+ +}
+ +
   static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
                             bool force)
   {
- -      unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
- -      int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+ +      unsigned int tcpu = select_target_cpu(dest);
+ +      int ret;
   
+ +      ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
         if (!ret)
                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
   
         return ret;
   }
   
- -/* To be called with desc->lock held. */
- -int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu)
- -{
- -      struct irq_data *d = irq_desc_get_irq_data(desc);
- -
- -      return set_affinity_irq(d, cpumask_of(tcpu), false);
- -}
- -EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn);
- -
   static void enable_dynirq(struct irq_data *data)
   {
         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
@@@ -1789,7 -1734,18 +1789,7 @@@ static void ack_dynirq(struct irq_data 
         if (!VALID_EVTCHN(evtchn))
                 return;
   
- -      if (unlikely(irqd_is_setaffinity_pending(data)) &&
- -          likely(!irqd_irq_disabled(data))) {
- -              int masked = test_and_set_mask(evtchn);
- -
- -              clear_evtchn(evtchn);
- -
- -              irq_move_masked_irq(data);
- -
- -              if (!masked)
- -                      unmask_evtchn(evtchn);
- -      } else
- -              clear_evtchn(evtchn);
+ +      clear_evtchn(evtchn);
   }
   
   static void mask_ack_dynirq(struct irq_data *data)
@@@ -1874,8 -1830,7 +1874,8 @@@ static void restore_cpu_virqs(unsigned 
   
                 /* Record the new mapping. */
                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- -              bind_evtchn_to_cpu(evtchn, cpu);
+ +              /* The affinity mask is still valid */
+ +              bind_evtchn_to_cpu(evtchn, cpu, false);
         }
   }
   
@@@ -1900,8 -1855,7 +1900,8 @@@ static void restore_cpu_ipis(unsigned i
   
                 /* Record the new mapping. */
                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- -              bind_evtchn_to_cpu(evtchn, cpu);
+ +              /* The affinity mask is still valid */
+ +              bind_evtchn_to_cpu(evtchn, cpu, false);
         }
   }
   
@@@ -1984,12 -1938,8 +1984,12 @@@ void xen_irq_resume(void
         xen_evtchn_resume();
   
         /* No IRQ <-> event-channel mappings. */
- -      list_for_each_entry(info, &xen_irq_list_head, list)
- -              info->evtchn = 0; /* zap event-channel binding */
+ +      list_for_each_entry(info, &xen_irq_list_head, list) {
+ +              /* Zap event-channel binding */
+ +              info->evtchn = 0;
+ +              /* Adjust accounting */
+ +              channels_on_cpu_dec(info);
+ +      }
   
         clear_evtchn_to_irq_all();
   
@@@ -2060,16 -2010,6 +2060,6 @@@ static struct irq_chip xen_percpu_chip 
         .irq_ack                = ack_dynirq,
   };
   
- int xen_set_callback_via(uint64_t via)
- {
-       struct xen_hvm_param a;
-       a.domid = DOMID_SELF;
-       a.index = HVM_PARAM_CALLBACK_IRQ;
-       a.value = via;
-       return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
- }
- EXPORT_SYMBOL_GPL(xen_set_callback_via);
- 
   #ifdef CONFIG_XEN_PVHVM
   /* Vector callbacks are better than PCI interrupts to receive event
    * channel notifications because we can receive vector callbacks on any
author	Linus Torvalds <[email protected]>
	Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)
committer	Linus Torvalds <[email protected]>
	Fri, 15 Jan 2021 18:52:00 +0000 (10:52 -0800)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/xen/events/events_base.c	patch \|	diff1 \|	diff2 \|	blob \| history