Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

author Linus Torvalds <[email protected]>

Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)

committer Linus Torvalds <[email protected]>

Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
author Linus Torvalds <[email protected]>
Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
committer Linus Torvalds <[email protected]>
Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
diff --combined arch/powerpc/kernel/perf_callchain.c

index 936f04dbfc6f00bd1803a643753520b34e1ae8b1,fe59c44f9b5be5577c8fa69d010074616f7889b5..a3c11cac3d7154d1381d77a83ec513e5c6601c11
--- 1/arch/powerpc/kernel/perf_callchain.c
--- 2/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@@ -119,6 -119,13 +119,6 @@@ static void perf_callchain_kernel(struc
   }
   
   #ifdef CONFIG_PPC64
- -
- -#ifdef CONFIG_HUGETLB_PAGE
- -#define is_huge_psize(pagesize)       (HPAGE_SHIFT && mmu_huge_psizes[pagesize])
- -#else
- -#define is_huge_psize(pagesize)       0
- -#endif
- -
   /*
    * On 64-bit we don't want to invoke hash_page on user addresses from
    * interrupt context, so if the access faults, we read the page tables
@@@ -128,7 -135,7 +128,7 @@@ static int read_user_stack_slow(void __
   {
         pgd_t *pgdir;
         pte_t *ptep, pte;
- -      int pagesize;
+ +      unsigned shift;
         unsigned long addr = (unsigned long) ptr;
         unsigned long offset;
         unsigned long pfn;
@@@ -138,14 -145,17 +138,14 @@@
         if (!pgdir)
                 return -EFAULT;
   
- -      pagesize = get_slice_psize(current->mm, addr);
+ +      ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+ +      if (!shift)
+ +              shift = PAGE_SHIFT;
   
         /* align address to page boundary */
- -      offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
+ +      offset = addr & ((1UL << shift) - 1);
         addr -= offset;
   
- -      if (is_huge_psize(pagesize))
- -              ptep = huge_pte_offset(current->mm, addr);
- -      else
- -              ptep = find_linux_pte(pgdir, addr);
- -
         if (ptep == NULL)
                 return -EFAULT;
         pte = *ptep;
@@@ -487,11 -497,11 +487,11 @@@ static void perf_callchain_user_32(stru
    * Since we can't get PMU interrupts inside a PMU interrupt handler,
    * we don't need separate irq and nmi entries here.
    */
- static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+ static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
   
   struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
   {
-       struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+       struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
   
         entry->nr = 0;
   
diff --combined arch/powerpc/kernel/setup-common.c

index 845c72ab7357884c580a1c6f3f3217ed4987ee75,aa5aeb947bc5754a9431dd52eb30d5d7e0390985..03dd6a248198c3247af741db5f154452b2c84f2c
--- 1/arch/powerpc/kernel/setup-common.c
--- 2/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@@ -157,7 -157,7 +157,7 @@@ extern u32 cpu_temp_both(unsigned long 
   #endif /* CONFIG_TAU */
   
   #ifdef CONFIG_SMP
- DEFINE_PER_CPU(unsigned int, pvr);
+ DEFINE_PER_CPU(unsigned int, cpu_pvr);
   #endif
   
   static int show_cpuinfo(struct seq_file *m, void *v)
@@@ -209,7 -209,7 +209,7 @@@
         }
   
   #ifdef CONFIG_SMP
-       pvr = per_cpu(pvr, cpu_id);
+       pvr = per_cpu(cpu_pvr, cpu_id);
   #else
         pvr = mfspr(SPRN_PVR);
   #endif
@@@ -660,7 -660,6 +660,7 @@@ late_initcall(check_cache_coherency)
   
   #ifdef CONFIG_DEBUG_FS
   struct dentry *powerpc_debugfs_root;
+ +EXPORT_SYMBOL(powerpc_debugfs_root);
   
   static int powerpc_debugfs_init(void)
   {
diff --combined arch/powerpc/kernel/smp.c

index 97196eefef3edccba66bfd7398b9d75962e1d5c4,2ebb48410976c9801917f9b40278163ed5ccc445..a521fb8a40ee2fcb206397cf490aab9550cf9c96
--- 1/arch/powerpc/kernel/smp.c
--- 2/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@@ -218,9 -218,6 +218,9 @@@ void crash_send_ipi(void (*crash_ipi_ca
   
   static void stop_this_cpu(void *dummy)
   {
+ +      /* Remove this CPU */
+ +      set_cpu_online(smp_processor_id(), false);
+ +
         local_irq_disable();
         while (1)
                 ;
@@@ -235,7 -232,7 +235,7 @@@ struct thread_info *current_set[NR_CPUS
   
   static void __devinit smp_store_cpu_info(int id)
   {
-       per_cpu(pvr, id) = mfspr(SPRN_PVR);
+       per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
   }
   
   static void __init smp_create_idle(unsigned int cpu)
diff --combined arch/powerpc/platforms/cell/interrupt.c

index f9dbf76a763f74fea41d2af522a23123f6880250,54bad901e4c9870c8c944d3bd3f981b5957ae60f..7267effc8078b53265e43898a7551f6adf2523d8
--- 1/arch/powerpc/platforms/cell/interrupt.c
--- 2/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@@ -54,7 -54,7 +54,7 @@@ struct iic 
         struct device_node *node;
   };
   
- static DEFINE_PER_CPU(struct iic, iic);
+ static DEFINE_PER_CPU(struct iic, cpu_iic);
   #define IIC_NODE_COUNT        2
   static struct irq_host *iic_host;
   
@@@ -82,13 -82,13 +82,13 @@@ static void iic_unmask(unsigned int irq
   
   static void iic_eoi(unsigned int irq)
   {
-       struct iic *iic = &__get_cpu_var(iic);
+       struct iic *iic = &__get_cpu_var(cpu_iic);
         out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
         BUG_ON(iic->eoi_ptr < 0);
   }
   
   static struct irq_chip iic_chip = {
- -      .typename = " CELL-IIC ",
+ +      .name = " CELL-IIC ",
         .mask = iic_mask,
         .unmask = iic_unmask,
         .eoi = iic_eoi,
@@@ -133,7 -133,7 +133,7 @@@ static void iic_ioexc_cascade(unsigned 
   
   
   static struct irq_chip iic_ioexc_chip = {
- -      .typename = " CELL-IOEX",
+ +      .name = " CELL-IOEX",
         .mask = iic_mask,
         .unmask = iic_unmask,
         .eoi = iic_ioexc_eoi,
@@@ -146,7 -146,7 +146,7 @@@ static unsigned int iic_get_irq(void
         struct iic *iic;
         unsigned int virq;
   
-       iic = &__get_cpu_var(iic);
+       iic = &__get_cpu_var(cpu_iic);
         *(unsigned long *) &pending =
                 in_be64((u64 __iomem *) &iic->regs->pending_destr);
         if (!(pending.flags & CBE_IIC_IRQ_VALID))
@@@ -161,12 -161,12 +161,12 @@@
   
   void iic_setup_cpu(void)
   {
-       out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+       out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff);
   }
   
   u8 iic_get_target_id(int cpu)
   {
-       return per_cpu(iic, cpu).target_id;
+       return per_cpu(cpu_iic, cpu).target_id;
   }
   
   EXPORT_SYMBOL_GPL(iic_get_target_id);
@@@ -181,7 -181,7 +181,7 @@@ static inline int iic_ipi_to_irq(int ip
   
   void iic_cause_IPI(int cpu, int mesg)
   {
-       out_be64(&per_cpu(iic, cpu).regs->generate, (0xf - mesg) << 4);
+       out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - mesg) << 4);
   }
   
   struct irq_host *iic_get_irq_host(int node)
@@@ -297,7 -297,7 +297,7 @@@ static int iic_host_map(struct irq_hos
   }
   
   static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
- -                         u32 *intspec, unsigned int intsize,
+ +                         const u32 *intspec, unsigned int intsize,
                            irq_hw_number_t *out_hwirq, unsigned int *out_flags)
   
   {
@@@ -348,7 -348,7 +348,7 @@@ static void __init init_one_iic(unsigne
         /* XXX FIXME: should locate the linux CPU number from the HW cpu
          * number properly. We are lucky for now
          */
-       struct iic *iic = &per_cpu(iic, hw_cpu);
+       struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
   
         iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
         BUG_ON(iic->regs == NULL);
diff --combined arch/x86/kernel/apic/nmi.c

index 6389432a9dbf7f07a0dd08b4e67857c6ec899d6d,e631cc4416f7872826a79d84078181b8917ce08b..0159a69396cba449a424190459a02d83a3f417d8
--- 1/arch/x86/kernel/apic/nmi.c
--- 2/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@@ -39,8 -39,7 +39,8 @@@
   int unknown_nmi_panic;
   int nmi_watchdog_enabled;
   
- -static cpumask_t backtrace_mask __read_mostly;
+ +/* For reliability, we're prepared to waste bits here. */
+ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
   
   /* nmi_active:
    * >0: the lapic NMI watchdog is active, but can be disabled
@@@ -361,7 -360,7 +361,7 @@@ void stop_apic_nmi_watchdog(void *unuse
    */
   
   static DEFINE_PER_CPU(unsigned, last_irq_sum);
- static DEFINE_PER_CPU(local_t, alert_counter);
+ static DEFINE_PER_CPU(long, alert_counter);
   static DEFINE_PER_CPU(int, nmi_touch);
   
   void touch_nmi_watchdog(void)
@@@ -415,7 -414,7 +415,7 @@@ nmi_watchdog_tick(struct pt_regs *regs
         }
   
         /* We can be called before check_nmi_watchdog, hence NULL check. */
- -      if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+ +      if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
                 static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
   
                 spin_lock(&lock);
@@@ -423,7 -422,7 +423,7 @@@
                 show_regs(regs);
                 dump_stack();
                 spin_unlock(&lock);
- -              cpumask_clear_cpu(cpu, &backtrace_mask);
+ +              cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
   
                 rc = 1;
         }
@@@ -438,8 -437,8 +438,8 @@@
                  * Ayiee, looks like this CPU is stuck ...
                  * wait a few IRQs (5 seconds) before doing the oops ...
                  */
-               local_inc(&__get_cpu_var(alert_counter));
-               if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+               __this_cpu_inc(per_cpu_var(alert_counter));
+               if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
                         /*
                          * die_nmi will return ONLY if NOTIFY_STOP happens..
                          */
@@@ -447,7 -446,7 +447,7 @@@
                                 regs, panic_on_timeout);
         } else {
                 __get_cpu_var(last_irq_sum) = sum;
-               local_set(&__get_cpu_var(alert_counter), 0);
+               __this_cpu_write(per_cpu_var(alert_counter), 0);
         }
   
         /* see if the nmi watchdog went off */
@@@ -559,14 -558,14 +559,14 @@@ void arch_trigger_all_cpu_backtrace(voi
   {
         int i;
   
- -      cpumask_copy(&backtrace_mask, cpu_online_mask);
+ +      cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
   
         printk(KERN_INFO "sending NMI to all CPUs:\n");
         apic->send_IPI_all(NMI_VECTOR);
   
         /* Wait for up to 10 seconds for all CPUs to do the backtrace */
         for (i = 0; i < 10 * 1000; i++) {
- -              if (cpumask_empty(&backtrace_mask))
+ +              if (cpumask_empty(to_cpumask(backtrace_mask)))
                         break;
                 mdelay(1);
         }
diff --combined arch/x86/kernel/cpu/common.c

index c1afa990a6c84bf49229a9c50052b063b9731236,3192f22f2fddc9335c3d987fe83ffdb0ab0c3c99..20399b7b0c3f1a4e3a3c9d5e66a92d952d93f028
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -61,7 -61,7 +61,7 @@@ void __init setup_cpu_local_masks(void
   static void __cpuinit default_init(struct cpuinfo_x86 *c)
   {
   #ifdef CONFIG_X86_64
- -      display_cacheinfo(c);
+ +      cpu_detect_cache_sizes(c);
   #else
         /* Not much we can do here... */
         /* Check if at least it has cpuid */
@@@ -383,7 -383,7 +383,7 @@@ static void __cpuinit get_model_name(st
         }
   }
   
- -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+ +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
   {
         unsigned int n, dummy, ebx, ecx, edx, l2size;
   
@@@ -391,6 -391,8 +391,6 @@@
   
         if (n >= 0x80000005) {
                 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- -              printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- -                              edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
                 c->x86_cache_size = (ecx>>24) + (edx>>24);
   #ifdef CONFIG_X86_64
                 /* On K8 L1 TLB is inclusive, so don't count it */
@@@ -420,6 -422,9 +420,6 @@@
   #endif
   
         c->x86_cache_size = l2size;
- -
- -      printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- -                      l2size, ecx & 0xFF);
   }
   
   void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@@ -654,31 -659,24 +654,31 @@@ void __init early_cpu_init(void
         const struct cpu_dev *const *cdev;
         int count = 0;
   
+ +#ifdef PROCESSOR_SELECT
         printk(KERN_INFO "KERNEL supported cpus:\n");
+ +#endif
+ +
         for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
                 const struct cpu_dev *cpudev = *cdev;
- -              unsigned int j;
   
                 if (count >= X86_VENDOR_NUM)
                         break;
                 cpu_devs[count] = cpudev;
                 count++;
   
- -              for (j = 0; j < 2; j++) {
- -                      if (!cpudev->c_ident[j])
- -                              continue;
- -                      printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
- -                              cpudev->c_ident[j]);
+ +#ifdef PROCESSOR_SELECT
+ +              {
+ +                      unsigned int j;
+ +
+ +                      for (j = 0; j < 2; j++) {
+ +                              if (!cpudev->c_ident[j])
+ +                                      continue;
+ +                              printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+ +                                      cpudev->c_ident[j]);
+ +                      }
                 }
+ +#endif
         }
- -
         early_identify_cpu(&boot_cpu_data);
   }
   
@@@ -839,8 -837,10 +839,8 @@@ static void __cpuinit identify_cpu(stru
                         boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
         }
   
- -#ifdef CONFIG_X86_MCE
         /* Init Machine Check Exception if available. */
- -      mcheck_init(c);
- -#endif
+ +      mcheck_cpu_init(c);
   
         select_idle_routine(c);
   
@@@ -1093,7 -1093,7 +1093,7 @@@ static void clear_all_debug_regs(void
   
   void __cpuinit cpu_init(void)
   {
-       struct orig_ist *orig_ist;
+       struct orig_ist *oist;
         struct task_struct *me;
         struct tss_struct *t;
         unsigned long v;
@@@ -1102,7 -1102,7 +1102,7 @@@
   
         cpu = stack_smp_processor_id();
         t = &per_cpu(init_tss, cpu);
-       orig_ist = &per_cpu(orig_ist, cpu);
+       oist = &per_cpu(orig_ist, cpu);
   
   #ifdef CONFIG_NUMA
         if (cpu != 0 && percpu_read(node_number) == 0 &&
@@@ -1136,19 -1136,19 +1136,19 @@@
         wrmsrl(MSR_KERNEL_GS_BASE, 0);
         barrier();
   
- -      check_efer();
+ +      x86_configure_nx();
         if (cpu != 0)
                 enable_x2apic();
   
         /*
          * set up and load the per-CPU TSS
          */
-       if (!orig_ist->ist[0]) {
+       if (!oist->ist[0]) {
                 char *estacks = per_cpu(exception_stacks, cpu);
   
                 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                         estacks += exception_stack_sizes[v];
-                       orig_ist->ist[v] = t->x86_tss.ist[v] =
+                       oist->ist[v] = t->x86_tss.ist[v] =
                                         (unsigned long)estacks;
                 }
         }
diff --combined arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c

index d2e7c77c1ea4901a8e3e6b2a70c99b5c991132f3,43eb3465dda73bb7314ca3c45f2f007c7e4d28cf..f28decf8dde3990626f493e5d962bfea9d48e59f
--- 1/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
--- 2/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@@ -68,9 -68,9 +68,9 @@@ struct acpi_cpufreq_data 
         unsigned int cpu_feature;
   };
   
- static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
   
- static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+ static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
   
   /* acpi_perf_data is a pointer to percpu data. */
   static struct acpi_processor_performance *acpi_perf_data;
@@@ -214,14 -214,14 +214,14 @@@ static u32 get_cur_val(const struct cpu
         if (unlikely(cpumask_empty(mask)))
                 return 0;
   
-       switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
+       switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
         case SYSTEM_INTEL_MSR_CAPABLE:
                 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
                 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
                 break;
         case SYSTEM_IO_CAPABLE:
                 cmd.type = SYSTEM_IO_CAPABLE;
-               perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
+               perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
                 cmd.addr.io.port = perf->control_register.address;
                 cmd.addr.io.bit_width = perf->control_register.bit_width;
                 break;
@@@ -268,8 -268,8 +268,8 @@@ static unsigned int get_measured_perf(s
         if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
                 return 0;
   
-       ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
-       per_cpu(old_perf, cpu) = perf;
+       ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+       per_cpu(acfreq_old_perf, cpu) = perf;
   
         retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
   
@@@ -278,7 -278,7 +278,7 @@@
   
   static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
   {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
         unsigned int freq;
         unsigned int cached_freq;
   
@@@ -322,7 -322,7 +322,7 @@@ static unsigned int check_freqs(const s
   static int acpi_cpufreq_target(struct cpufreq_policy *policy,
                                unsigned int target_freq, unsigned int relation)
   {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
         struct acpi_processor_performance *perf;
         struct cpufreq_freqs freqs;
         struct drv_cmd cmd;
@@@ -416,7 -416,7 +416,7 @@@ out
   
   static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
   {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
   
         dprintk("acpi_cpufreq_verify\n");
   
@@@ -526,21 -526,15 +526,21 @@@ static const struct dmi_system_id sw_an
   
   static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
   {
- -      /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
+ +      /* Intel Xeon Processor 7100 Series Specification Update
+ +       * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
          * AL30: A Machine Check Exception (MCE) Occurring during an
          * Enhanced Intel SpeedStep Technology Ratio Change May Cause
- -       * Both Processor Cores to Lock Up when HT is enabled*/
+ +       * Both Processor Cores to Lock Up. */
         if (c->x86_vendor == X86_VENDOR_INTEL) {
                 if ((c->x86 == 15) &&
                     (c->x86_model == 6) &&
- -                  (c->x86_mask == 8) && smt_capable())
+ +                  (c->x86_mask == 8)) {
+ +                      printk(KERN_INFO "acpi-cpufreq: Intel(R) "
+ +                          "Xeon(R) 7100 Errata AL30, processors may "
+ +                          "lock up on frequency changes: disabling "
+ +                          "acpi-cpufreq.\n");
                         return -ENODEV;
+ +                  }
                 }
         return 0;
   }
@@@ -555,18 -549,13 +555,18 @@@ static int acpi_cpufreq_cpu_init(struc
         unsigned int result = 0;
         struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
         struct acpi_processor_performance *perf;
+ +#ifdef CONFIG_SMP
+ +      static int blacklisted;
+ +#endif
   
         dprintk("acpi_cpufreq_cpu_init\n");
   
   #ifdef CONFIG_SMP
- -      result = acpi_cpufreq_blacklist(c);
- -      if (result)
- -              return result;
+ +      if (blacklisted)
+ +              return blacklisted;
+ +      blacklisted = acpi_cpufreq_blacklist(c);
+ +      if (blacklisted)
+ +              return blacklisted;
   #endif
   
         data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
@@@ -574,7 -563,7 +574,7 @@@
                 return -ENOMEM;
   
         data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
-       per_cpu(drv_data, cpu) = data;
+       per_cpu(acfreq_data, cpu) = data;
   
         if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
                 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@@ -725,20 -714,20 +725,20 @@@ err_unreg
         acpi_processor_unregister_performance(perf, cpu);
   err_free:
         kfree(data);
-       per_cpu(drv_data, cpu) = NULL;
+       per_cpu(acfreq_data, cpu) = NULL;
   
         return result;
   }
   
   static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
   {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
   
         dprintk("acpi_cpufreq_cpu_exit\n");
   
         if (data) {
                 cpufreq_frequency_table_put_attr(policy->cpu);
-               per_cpu(drv_data, policy->cpu) = NULL;
+               per_cpu(acfreq_data, policy->cpu) = NULL;
                 acpi_processor_unregister_performance(data->acpi_data,
                                                       policy->cpu);
                 kfree(data);
@@@ -749,7 -738,7 +749,7 @@@
   
   static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
   {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
   
         dprintk("acpi_cpufreq_resume\n");
   
@@@ -764,15 -753,14 +764,15 @@@ static struct freq_attr *acpi_cpufreq_a
   };
   
   static struct cpufreq_driver acpi_cpufreq_driver = {
- -      .verify = acpi_cpufreq_verify,
- -      .target = acpi_cpufreq_target,
- -      .init = acpi_cpufreq_cpu_init,
- -      .exit = acpi_cpufreq_cpu_exit,
- -      .resume = acpi_cpufreq_resume,
- -      .name = "acpi-cpufreq",
- -      .owner = THIS_MODULE,
- -      .attr = acpi_cpufreq_attr,
+ +      .verify         = acpi_cpufreq_verify,
+ +      .target         = acpi_cpufreq_target,
+ +      .bios_limit     = acpi_processor_get_bios_limit,
+ +      .init           = acpi_cpufreq_cpu_init,
+ +      .exit           = acpi_cpufreq_cpu_exit,
+ +      .resume         = acpi_cpufreq_resume,
+ +      .name           = "acpi-cpufreq",
+ +      .owner          = THIS_MODULE,
+ +      .attr           = acpi_cpufreq_attr,
   };
   
   static int __init acpi_cpufreq_init(void)
diff --combined arch/x86/kernel/cpu/intel_cacheinfo.c

index 6c40f6b5b340b031192232bdc946d62c7076d0cc,f5ccb4fa5a5d7d00ed1a4a8dce0db5a7a1cabd44..0c06bca2a1dcc1dc68003aa0bdde3935b254967d
--- 1/arch/x86/kernel/cpu/intel_cacheinfo.c
--- 2/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@@ -94,7 -94,7 +94,7 @@@ static const struct _cache_table __cpui
         { 0xd1, LVL_3,    1024 },       /* 4-way set assoc, 64 byte line size */
         { 0xd2, LVL_3,    2048 },       /* 4-way set assoc, 64 byte line size */
         { 0xd6, LVL_3,    1024 },       /* 8-way set assoc, 64 byte line size */
- -      { 0xd7, LVL_3,    2038 },       /* 8-way set assoc, 64 byte line size */
+ +      { 0xd7, LVL_3,    2048 },       /* 8-way set assoc, 64 byte line size */
         { 0xd8, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
         { 0xdc, LVL_3,    2048 },       /* 12-way set assoc, 64 byte line size */
         { 0xdd, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
@@@ -102,9 -102,6 +102,9 @@@
         { 0xe2, LVL_3,    2048 },       /* 16-way set assoc, 64 byte line size */
         { 0xe3, LVL_3,    4096 },       /* 16-way set assoc, 64 byte line size */
         { 0xe4, LVL_3,    8192 },       /* 16-way set assoc, 64 byte line size */
+ +      { 0xea, LVL_3,    12288 },      /* 24-way set assoc, 64 byte line size */
+ +      { 0xeb, LVL_3,    18432 },      /* 24-way set assoc, 64 byte line size */
+ +      { 0xec, LVL_3,    24576 },      /* 24-way set assoc, 64 byte line size */
         { 0x00, 0, 0}
   };
   
@@@ -491,6 -488,22 +491,6 @@@ unsigned int __cpuinit init_intel_cache
   #endif
         }
   
- -      if (trace)
- -              printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
- -      else if (l1i)
- -              printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
- -
- -      if (l1d)
- -              printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
- -      else
- -              printk(KERN_CONT "\n");
- -
- -      if (l2)
- -              printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
- -
- -      if (l3)
- -              printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
- -
         c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
   
         return l2;
@@@ -499,8 -512,8 +499,8 @@@
   #ifdef CONFIG_SYSFS
   
   /* pointer to _cpuid4_info array (for each cache leaf) */
- static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
- #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
+ #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
   
   #ifdef CONFIG_SMP
   static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@@ -513,7 -526,7 +513,7 @@@
         if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
                 struct cpuinfo_x86 *d;
                 for_each_online_cpu(i) {
-                       if (!per_cpu(cpuid4_info, i))
+                       if (!per_cpu(ici_cpuid4_info, i))
                                 continue;
                         d = &cpu_data(i);
                         this_leaf = CPUID4_INFO_IDX(i, index);
@@@ -535,7 -548,7 +535,7 @@@
                             c->apicid >> index_msb) {
                                 cpumask_set_cpu(i,
                                         to_cpumask(this_leaf->shared_cpu_map));
-                               if (i != cpu && per_cpu(cpuid4_info, i))  {
+                               if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
                                         sibling_leaf =
                                                 CPUID4_INFO_IDX(i, index);
                                         cpumask_set_cpu(cpu, to_cpumask(
@@@ -574,8 -587,8 +574,8 @@@ static void __cpuinit free_cache_attrib
         for (i = 0; i < num_cache_leaves; i++)
                 cache_remove_shared_cpu_map(cpu, i);
   
-       kfree(per_cpu(cpuid4_info, cpu));
-       per_cpu(cpuid4_info, cpu) = NULL;
+       kfree(per_cpu(ici_cpuid4_info, cpu));
+       per_cpu(ici_cpuid4_info, cpu) = NULL;
   }
   
   static int
@@@ -614,15 -627,15 +614,15 @@@ static int __cpuinit detect_cache_attri
         if (num_cache_leaves == 0)
                 return -ENOENT;
   
-       per_cpu(cpuid4_info, cpu) = kzalloc(
+       per_cpu(ici_cpuid4_info, cpu) = kzalloc(
             sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-       if (per_cpu(cpuid4_info, cpu) == NULL)
+       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
                 return -ENOMEM;
   
         smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
         if (retval) {
-               kfree(per_cpu(cpuid4_info, cpu));
-               per_cpu(cpuid4_info, cpu) = NULL;
+               kfree(per_cpu(ici_cpuid4_info, cpu));
+               per_cpu(ici_cpuid4_info, cpu) = NULL;
         }
   
         return retval;
@@@ -634,7 -647,7 +634,7 @@@
   extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
   
   /* pointer to kobject for cpuX/cache */
- static DEFINE_PER_CPU(struct kobject *, cache_kobject);
+ static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
   
   struct _index_kobject {
         struct kobject kobj;
@@@ -643,8 -656,8 +643,8 @@@
   };
   
   /* pointer to array of kobjects for cpuX/cache/indexY */
- static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
- #define INDEX_KOBJECT_PTR(x, y)               (&((per_cpu(index_kobject, x))[y]))
+ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
+ #define INDEX_KOBJECT_PTR(x, y)               (&((per_cpu(ici_index_kobject, x))[y]))
   
   #define show_one_plus(file_name, object, val)                         \
   static ssize_t show_##file_name                                               \
@@@ -863,10 -876,10 +863,10 @@@ static struct kobj_type ktype_percpu_en
   
   static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
   {
-       kfree(per_cpu(cache_kobject, cpu));
-       kfree(per_cpu(index_kobject, cpu));
-       per_cpu(cache_kobject, cpu) = NULL;
-       per_cpu(index_kobject, cpu) = NULL;
+       kfree(per_cpu(ici_cache_kobject, cpu));
+       kfree(per_cpu(ici_index_kobject, cpu));
+       per_cpu(ici_cache_kobject, cpu) = NULL;
+       per_cpu(ici_index_kobject, cpu) = NULL;
         free_cache_attributes(cpu);
   }
   
@@@ -882,14 -895,14 +882,14 @@@ static int __cpuinit cpuid4_cache_sysfs
                 return err;
   
         /* Allocate all required memory */
-       per_cpu(cache_kobject, cpu) =
+       per_cpu(ici_cache_kobject, cpu) =
                 kzalloc(sizeof(struct kobject), GFP_KERNEL);
-       if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
+       if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
                 goto err_out;
   
-       per_cpu(index_kobject, cpu) = kzalloc(
+       per_cpu(ici_index_kobject, cpu) = kzalloc(
             sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
-       if (unlikely(per_cpu(index_kobject, cpu) == NULL))
+       if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
                 goto err_out;
   
         return 0;
@@@ -913,7 -926,7 +913,7 @@@ static int __cpuinit cache_add_dev(stru
         if (unlikely(retval < 0))
                 return retval;
   
-       retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
+       retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
                                       &ktype_percpu_entry,
                                       &sys_dev->kobj, "%s", "cache");
         if (retval < 0) {
@@@ -927,12 -940,12 +927,12 @@@
                 this_object->index = i;
                 retval = kobject_init_and_add(&(this_object->kobj),
                                               &ktype_cache,
-                                             per_cpu(cache_kobject, cpu),
+                                             per_cpu(ici_cache_kobject, cpu),
                                               "index%1lu", i);
                 if (unlikely(retval)) {
                         for (j = 0; j < i; j++)
                                 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
-                       kobject_put(per_cpu(cache_kobject, cpu));
+                       kobject_put(per_cpu(ici_cache_kobject, cpu));
                         cpuid4_cache_sysfs_exit(cpu);
                         return retval;
                 }
@@@ -940,7 -953,7 +940,7 @@@
         }
         cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
   
-       kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
+       kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
         return 0;
   }
   
@@@ -949,7 -962,7 +949,7 @@@ static void __cpuinit cache_remove_dev(
         unsigned int cpu = sys_dev->id;
         unsigned long i;
   
-       if (per_cpu(cpuid4_info, cpu) == NULL)
+       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
                 return;
         if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
                 return;
@@@ -957,7 -970,7 +957,7 @@@
   
         for (i = 0; i < num_cache_leaves; i++)
                 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
-       kobject_put(per_cpu(cache_kobject, cpu));
+       kobject_put(per_cpu(ici_cache_kobject, cpu));
         cpuid4_cache_sysfs_exit(cpu);
   }
   
diff --combined arch/x86/kvm/svm.c

index 3de0b37ec038673c3a70b4f14be7dcd5656dfcfe,6c79a14a3b6f8c784c6f6118ece14fd4e2bdd6b3..1d9b33843c80ef521dc059697285cfed06cfd7d7
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -46,7 -46,6 +46,7 @@@ MODULE_LICENSE("GPL")
   #define SVM_FEATURE_NPT  (1 << 0)
   #define SVM_FEATURE_LBRV (1 << 1)
   #define SVM_FEATURE_SVML (1 << 2)
+ +#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
   
   #define NESTED_EXIT_HOST      0       /* Exit handled on host level */
   #define NESTED_EXIT_DONE      1       /* Exit caused nested vmexit  */
@@@ -54,6 -53,15 +54,6 @@@
   
   #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
   
- -/* Turn on to get debugging output*/
- -/* #define NESTED_DEBUG */
- -
- -#ifdef NESTED_DEBUG
- -#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
- -#else
- -#define nsvm_printk(fmt, args...) do {} while(0)
- -#endif
- -
   static const u32 host_save_user_msrs[] = {
   #ifdef CONFIG_X86_64
         MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@@ -77,9 -85,6 +77,9 @@@ struct nested_state 
         /* gpa pointers to the real vectors */
         u64 vmcb_msrpm;
   
+ +      /* A VMEXIT is required but not yet emulated */
+ +      bool exit_required;
+ +
         /* cache for intercepts of the guest */
         u16 intercept_cr_read;
         u16 intercept_cr_write;
@@@ -107,8 -112,6 +107,8 @@@ struct vcpu_svm 
         u32 *msrpm;
   
         struct nested_state nested;
+ +
+ +      bool nmi_singlestep;
   };
   
   /* enable NPT for AMD64 and X86 with PAE */
@@@ -283,7 -286,7 +283,7 @@@ static void skip_emulated_instruction(s
         struct vcpu_svm *svm = to_svm(vcpu);
   
         if (!svm->next_rip) {
- -              if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+ +              if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
                                 EMULATE_DONE)
                         printk(KERN_DEBUG "%s: NOP\n", __func__);
                 return;
@@@ -313,81 -316,74 +313,79 @@@ static void svm_hardware_disable(void *
         cpu_svm_disable();
   }
   
- -static void svm_hardware_enable(void *garbage)
+ +static int svm_hardware_enable(void *garbage)
   {
   
-       struct svm_cpu_data *svm_data;
+       struct svm_cpu_data *sd;
         uint64_t efer;
         struct descriptor_table gdt_descr;
         struct desc_struct *gdt;
         int me = raw_smp_processor_id();
   
+ +      rdmsrl(MSR_EFER, efer);
+ +      if (efer & EFER_SVME)
+ +              return -EBUSY;
+ +
         if (!has_svm()) {
- -              printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
- -              return;
+ +              printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+ +                     me);
+ +              return -EINVAL;
         }
-       svm_data = per_cpu(svm_data, me);
+       sd = per_cpu(svm_data, me);
   
-       if (!svm_data) {
+       if (!sd) {
- -              printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+ +              printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
                        me);
- -              return;
+ +              return -EINVAL;
         }
   
-       svm_data->asid_generation = 1;
-       svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
-       svm_data->next_asid = svm_data->max_asid + 1;
+       sd->asid_generation = 1;
+       sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
+       sd->next_asid = sd->max_asid + 1;
   
         kvm_get_gdt(&gdt_descr);
         gdt = (struct desc_struct *)gdt_descr.base;
-       svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
+       sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
   
- -      rdmsrl(MSR_EFER, efer);
         wrmsrl(MSR_EFER, efer | EFER_SVME);
   
--      wrmsrl(MSR_VM_HSAVE_PA,
-              page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
- -             page_to_pfn(sd->save_area) << PAGE_SHIFT);
++      wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+ +
+ +      return 0;
   }
   
   static void svm_cpu_uninit(int cpu)
   {
-       struct svm_cpu_data *svm_data
-               = per_cpu(svm_data, raw_smp_processor_id());
+       struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
   
-       if (!svm_data)
+       if (!sd)
                 return;
   
         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
-       __free_page(svm_data->save_area);
-       kfree(svm_data);
+       __free_page(sd->save_area);
+       kfree(sd);
   }
   
   static int svm_cpu_init(int cpu)
   {
-       struct svm_cpu_data *svm_data;
+       struct svm_cpu_data *sd;
         int r;
   
-       svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
-       if (!svm_data)
+       sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
+       if (!sd)
                 return -ENOMEM;
-       svm_data->cpu = cpu;
-       svm_data->save_area = alloc_page(GFP_KERNEL);
+       sd->cpu = cpu;
+       sd->save_area = alloc_page(GFP_KERNEL);
         r = -ENOMEM;
-       if (!svm_data->save_area)
+       if (!sd->save_area)
                 goto err_1;
   
-       per_cpu(svm_data, cpu) = svm_data;
+       per_cpu(svm_data, cpu) = sd;
   
         return 0;
   
   err_1:
-       kfree(svm_data);
+       kfree(sd);
         return r;
   
   }
@@@ -479,7 -475,7 +477,7 @@@ static __init int svm_hardware_setup(vo
                 kvm_enable_efer_bits(EFER_SVME);
         }
   
- -      for_each_online_cpu(cpu) {
+ +      for_each_possible_cpu(cpu) {
                 r = svm_cpu_init(cpu);
                 if (r)
                         goto err;
@@@ -513,7 -509,7 +511,7 @@@ static __exit void svm_hardware_unsetup
   {
         int cpu;
   
- -      for_each_online_cpu(cpu)
+ +      for_each_possible_cpu(cpu)
                 svm_cpu_uninit(cpu);
   
         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@@ -628,12 -624,11 +626,12 @@@ static void init_vmcb(struct vcpu_svm *
         save->rip = 0x0000fff0;
         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
   
- -      /*
- -       * cr0 val on cpu init should be 0x60000010, we enable cpu
- -       * cache by default. the orderly way is to enable cache in bios.
+ +      /* This is the guest-visible cr0 value.
+ +       * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
          */
- -      save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
+ +      svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+ +      kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+ +
         save->cr4 = X86_CR4_PAE;
         /* rdx = ?? */
   
@@@ -648,6 -643,8 +646,6 @@@
                 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
                                                  INTERCEPT_CR3_MASK);
                 save->g_pat = 0x0007040600070406ULL;
- -              /* enable caching because the QEMU Bios doesn't enable it */
- -              save->cr0 = X86_CR0_ET;
                 save->cr3 = 0;
                 save->cr4 = 0;
         }
@@@ -656,11 -653,6 +654,11 @@@
         svm->nested.vmcb = 0;
         svm->vcpu.arch.hflags = 0;
   
+ +      if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+ +              control->pause_filter_count = 3000;
+ +              control->intercept |= (1ULL << INTERCEPT_PAUSE);
+ +      }
+ +
         enable_gif(svm);
   }
   
@@@ -765,16 -757,15 +763,16 @@@ static void svm_vcpu_load(struct kvm_vc
         int i;
   
         if (unlikely(cpu != vcpu->cpu)) {
- -              u64 tsc_this, delta;
+ +              u64 delta;
   
                 /*
                  * Make sure that the guest sees a monotonically
                  * increasing TSC.
                  */
- -              rdtscll(tsc_this);
- -              delta = vcpu->arch.host_tsc - tsc_this;
+ +              delta = vcpu->arch.host_tsc - native_read_tsc();
                 svm->vmcb->control.tsc_offset += delta;
+ +              if (is_nested(svm))
+ +                      svm->nested.hsave->control.tsc_offset += delta;
                 vcpu->cpu = cpu;
                 kvm_migrate_timers(vcpu);
                 svm->asid_generation = 0;
@@@ -793,7 -784,7 +791,7 @@@ static void svm_vcpu_put(struct kvm_vcp
         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
   
- -      rdtscll(vcpu->arch.host_tsc);
+ +      vcpu->arch.host_tsc = native_read_tsc();
   }
   
   static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@@ -1051,7 -1042,7 +1049,7 @@@ static void update_db_intercept(struct 
         svm->vmcb->control.intercept_exceptions &=
                 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
   
- -      if (vcpu->arch.singlestep)
+ +      if (svm->nmi_singlestep)
                 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
   
         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@@ -1066,16 -1057,26 +1064,16 @@@
                 vcpu->guest_debug = 0;
   }
   
- -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+ +static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
   {
- -      int old_debug = vcpu->guest_debug;
         struct vcpu_svm *svm = to_svm(vcpu);
   
- -      vcpu->guest_debug = dbg->control;
- -
- -      update_db_intercept(vcpu);
- -
         if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
                 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
         else
                 svm->vmcb->save.dr7 = vcpu->arch.dr7;
   
- -      if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- -              svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- -      else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
- -              svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- -
- -      return 0;
+ +      update_db_intercept(vcpu);
   }
   
   static void load_host_msrs(struct kvm_vcpu *vcpu)
@@@ -1092,16 -1093,16 +1090,16 @@@ static void save_host_msrs(struct kvm_v
   #endif
   }
   
- static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
+ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
   {
-       if (svm_data->next_asid > svm_data->max_asid) {
-               ++svm_data->asid_generation;
-               svm_data->next_asid = 1;
+       if (sd->next_asid > sd->max_asid) {
+               ++sd->asid_generation;
+               sd->next_asid = 1;
                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
         }
   
-       svm->asid_generation = svm_data->asid_generation;
-       svm->vmcb->control.asid = svm_data->next_asid++;
+       svm->asid_generation = sd->asid_generation;
+       svm->vmcb->control.asid = sd->next_asid++;
   }
   
   static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
@@@ -1176,7 -1177,7 +1174,7 @@@ static void svm_set_dr(struct kvm_vcpu 
         }
   }
   
- -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int pf_interception(struct vcpu_svm *svm)
   {
         u64 fault_address;
         u32 error_code;
@@@ -1190,19 -1191,17 +1188,19 @@@
         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
   }
   
- -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int db_interception(struct vcpu_svm *svm)
   {
+ +      struct kvm_run *kvm_run = svm->vcpu.run;
+ +
         if (!(svm->vcpu.guest_debug &
               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
- -              !svm->vcpu.arch.singlestep) {
+ +              !svm->nmi_singlestep) {
                 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
                 return 1;
         }
   
- -      if (svm->vcpu.arch.singlestep) {
- -              svm->vcpu.arch.singlestep = false;
+ +      if (svm->nmi_singlestep) {
+ +              svm->nmi_singlestep = false;
                 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
                         svm->vmcb->save.rflags &=
                                 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
@@@ -1221,27 -1220,25 +1219,27 @@@
         return 1;
   }
   
- -static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int bp_interception(struct vcpu_svm *svm)
   {
+ +      struct kvm_run *kvm_run = svm->vcpu.run;
+ +
         kvm_run->exit_reason = KVM_EXIT_DEBUG;
         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
         kvm_run->debug.arch.exception = BP_VECTOR;
         return 0;
   }
   
- -static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int ud_interception(struct vcpu_svm *svm)
   {
         int er;
   
- -      er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+ +      er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
         if (er != EMULATE_DONE)
                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
         return 1;
   }
   
- -static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int nm_interception(struct vcpu_svm *svm)
   {
         svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
         if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
@@@ -1251,7 -1248,7 +1249,7 @@@
         return 1;
   }
   
- -static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int mc_interception(struct vcpu_svm *svm)
   {
         /*
          * On an #MC intercept the MCE handler is not called automatically in
@@@ -1264,10 -1261,8 +1262,10 @@@
         return 1;
   }
   
- -static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int shutdown_interception(struct vcpu_svm *svm)
   {
+ +      struct kvm_run *kvm_run = svm->vcpu.run;
+ +
         /*
          * VMCB is undefined after a SHUTDOWN intercept
          * so reinitialize it.
@@@ -1279,7 -1274,7 +1277,7 @@@
         return 0;
   }
   
- -static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int io_interception(struct vcpu_svm *svm)
   {
         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
         int size, in, string;
@@@ -1293,7 -1288,7 +1291,7 @@@
   
         if (string) {
                 if (emulate_instruction(&svm->vcpu,
- -                                      kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
+ +                                      0, 0, 0) == EMULATE_DO_MMIO)
                         return 0;
                 return 1;
         }
@@@ -1303,33 -1298,33 +1301,33 @@@
         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
   
         skip_emulated_instruction(&svm->vcpu);
- -      return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
+ +      return kvm_emulate_pio(&svm->vcpu, in, size, port);
   }
   
- -static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int nmi_interception(struct vcpu_svm *svm)
   {
         return 1;
   }
   
- -static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int intr_interception(struct vcpu_svm *svm)
   {
         ++svm->vcpu.stat.irq_exits;
         return 1;
   }
   
- -static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int nop_on_interception(struct vcpu_svm *svm)
   {
         return 1;
   }
   
- -static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int halt_interception(struct vcpu_svm *svm)
   {
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
         skip_emulated_instruction(&svm->vcpu);
         return kvm_emulate_halt(&svm->vcpu);
   }
   
- -static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int vmmcall_interception(struct vcpu_svm *svm)
   {
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
         skip_emulated_instruction(&svm->vcpu);
@@@ -1380,15 -1375,8 +1378,15 @@@ static inline int nested_svm_intr(struc
   
         svm->vmcb->control.exit_code = SVM_EXIT_INTR;
   
- -      if (nested_svm_exit_handled(svm)) {
- -              nsvm_printk("VMexit -> INTR\n");
+ +      if (svm->nested.intercept & 1ULL) {
+ +              /*
+ +               * The #vmexit can't be emulated here directly because this
+ +               * code path runs with irqs and preemtion disabled. A
+ +               * #vmexit emulation might sleep. Only signal request for
+ +               * the #vmexit here.
+ +               */
+ +              svm->nested.exit_required = true;
+ +              trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
                 return 1;
         }
   
@@@ -1399,7 -1387,10 +1397,7 @@@ static void *nested_svm_map(struct vcpu
   {
         struct page *page;
   
- -      down_read(&current->mm->mmap_sem);
         page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
- -      up_read(&current->mm->mmap_sem);
- -
         if (is_error_page(page))
                 goto error;
   
@@@ -1538,12 -1529,14 +1536,12 @@@ static int nested_svm_exit_handled(stru
         }
         default: {
                 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- -              nsvm_printk("exit code: 0x%x\n", exit_code);
                 if (svm->nested.intercept & exit_bits)
                         vmexit = NESTED_EXIT_DONE;
         }
         }
   
         if (vmexit == NESTED_EXIT_DONE) {
- -              nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
                 nested_svm_vmexit(svm);
         }
   
@@@ -1588,12 -1581,6 +1586,12 @@@ static int nested_svm_vmexit(struct vcp
         struct vmcb *hsave = svm->nested.hsave;
         struct vmcb *vmcb = svm->vmcb;
   
+ +      trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ +                                     vmcb->control.exit_info_1,
+ +                                     vmcb->control.exit_info_2,
+ +                                     vmcb->control.exit_int_info,
+ +                                     vmcb->control.exit_int_info_err);
+ +
         nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
         if (!nested_vmcb)
                 return 1;
@@@ -1627,22 -1614,6 +1625,22 @@@
         nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
         nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
         nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+ +
+ +      /*
+ +       * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ +       * to make sure that we do not lose injected events. So check event_inj
+ +       * here and copy it to exit_int_info if it is valid.
+ +       * Exit_int_info and event_inj can't be both valid because the case
+ +       * below only happens on a VMRUN instruction intercept which has
+ +       * no valid exit_int_info set.
+ +       */
+ +      if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ +              struct vmcb_control_area *nc = &nested_vmcb->control;
+ +
+ +              nc->exit_int_info     = vmcb->control.event_inj;
+ +              nc->exit_int_info_err = vmcb->control.event_inj_err;
+ +      }
+ +
         nested_vmcb->control.tlb_ctl           = 0;
         nested_vmcb->control.event_inj         = 0;
         nested_vmcb->control.event_inj_err     = 0;
@@@ -1654,6 -1625,10 +1652,6 @@@
         /* Restore the original control entries */
         copy_vmcb_control_area(vmcb, hsave);
   
- -      /* Kill any pending exceptions */
- -      if (svm->vcpu.arch.exception.pending == true)
- -              nsvm_printk("WARNING: Pending Exception\n");
- -
         kvm_clear_exception_queue(&svm->vcpu);
         kvm_clear_interrupt_queue(&svm->vcpu);
   
@@@ -1724,12 -1699,6 +1722,12 @@@ static bool nested_svm_vmrun(struct vcp
         /* nested_vmcb is our indicator if nested SVM is activated */
         svm->nested.vmcb = svm->vmcb->save.rax;
   
+ +      trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ +                             nested_vmcb->save.rip,
+ +                             nested_vmcb->control.int_ctl,
+ +                             nested_vmcb->control.event_inj,
+ +                             nested_vmcb->control.nested_ctl);
+ +
         /* Clear internal status */
         kvm_clear_exception_queue(&svm->vcpu);
         kvm_clear_interrupt_queue(&svm->vcpu);
@@@ -1817,15 -1786,28 +1815,15 @@@
         svm->nested.intercept            = nested_vmcb->control.intercept;
   
         force_new_asid(&svm->vcpu);
- -      svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
- -      svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- -      if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- -              nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- -                              nested_vmcb->control.int_ctl);
- -      }
         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
         else
                 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
   
- -      nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- -                      nested_vmcb->control.exit_int_info,
- -                      nested_vmcb->control.int_state);
- -
         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- -      if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- -              nsvm_printk("Injecting Event: 0x%x\n",
- -                              nested_vmcb->control.event_inj);
         svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
         svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
   
@@@ -1852,7 -1834,7 +1850,7 @@@ static void nested_svm_vmloadsave(struc
         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
   }
   
- -static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int vmload_interception(struct vcpu_svm *svm)
   {
         struct vmcb *nested_vmcb;
   
@@@ -1872,7 -1854,7 +1870,7 @@@
         return 1;
   }
   
- -static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int vmsave_interception(struct vcpu_svm *svm)
   {
         struct vmcb *nested_vmcb;
   
@@@ -1892,8 -1874,10 +1890,8 @@@
         return 1;
   }
   
- -static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int vmrun_interception(struct vcpu_svm *svm)
   {
- -      nsvm_printk("VMrun\n");
- -
         if (nested_svm_check_permissions(svm))
                 return 1;
   
@@@ -1920,7 -1904,7 +1918,7 @@@ failed
         return 1;
   }
   
- -static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int stgi_interception(struct vcpu_svm *svm)
   {
         if (nested_svm_check_permissions(svm))
                 return 1;
@@@ -1933,7 -1917,7 +1931,7 @@@
         return 1;
   }
   
- -static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int clgi_interception(struct vcpu_svm *svm)
   {
         if (nested_svm_check_permissions(svm))
                 return 1;
@@@ -1950,12 -1934,10 +1948,12 @@@
         return 1;
   }
   
- -static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int invlpga_interception(struct vcpu_svm *svm)
   {
         struct kvm_vcpu *vcpu = &svm->vcpu;
- -      nsvm_printk("INVLPGA\n");
+ +
+ +      trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ +                        vcpu->arch.regs[VCPU_REGS_RAX]);
   
         /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
         kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
@@@ -1965,21 -1947,15 +1963,21 @@@
         return 1;
   }
   
- -static int invalid_op_interception(struct vcpu_svm *svm,
- -                                 struct kvm_run *kvm_run)
+ +static int skinit_interception(struct vcpu_svm *svm)
   {
+ +      trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+ +
         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
         return 1;
   }
   
- -static int task_switch_interception(struct vcpu_svm *svm,
- -                                  struct kvm_run *kvm_run)
+ +static int invalid_op_interception(struct vcpu_svm *svm)
+ +{
+ +      kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ +      return 1;
+ +}
+ +
+ +static int task_switch_interception(struct vcpu_svm *svm)
   {
         u16 tss_selector;
         int reason;
@@@ -2029,14 -2005,14 +2027,14 @@@
         return kvm_task_switch(&svm->vcpu, tss_selector, reason);
   }
   
- -static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int cpuid_interception(struct vcpu_svm *svm)
   {
         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
         kvm_emulate_cpuid(&svm->vcpu);
         return 1;
   }
   
- -static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int iret_interception(struct vcpu_svm *svm)
   {
         ++svm->vcpu.stat.nmi_window_exits;
         svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
@@@ -2044,27 -2020,26 +2042,27 @@@
         return 1;
   }
   
- -static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int invlpg_interception(struct vcpu_svm *svm)
   {
- -      if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
+ +      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
         return 1;
   }
   
- -static int emulate_on_interception(struct vcpu_svm *svm,
- -                                 struct kvm_run *kvm_run)
+ +static int emulate_on_interception(struct vcpu_svm *svm)
   {
- -      if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
+ +      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
         return 1;
   }
   
- -static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int cr8_write_interception(struct vcpu_svm *svm)
   {
+ +      struct kvm_run *kvm_run = svm->vcpu.run;
+ +
         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
         /* instruction emulation calls kvm_set_cr8() */
- -      emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
+ +      emulate_instruction(&svm->vcpu, 0, 0, 0);
         if (irqchip_in_kernel(svm->vcpu.kvm)) {
                 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
                 return 1;
@@@ -2081,14 -2056,10 +2079,14 @@@ static int svm_get_msr(struct kvm_vcpu 
   
         switch (ecx) {
         case MSR_IA32_TSC: {
- -              u64 tsc;
+ +              u64 tsc_offset;
   
- -              rdtscll(tsc);
- -              *data = svm->vmcb->control.tsc_offset + tsc;
+ +              if (is_nested(svm))
+ +                      tsc_offset = svm->nested.hsave->control.tsc_offset;
+ +              else
+ +                      tsc_offset = svm->vmcb->control.tsc_offset;
+ +
+ +              *data = tsc_offset + native_read_tsc();
                 break;
         }
         case MSR_K6_STAR:
@@@ -2150,7 -2121,7 +2148,7 @@@
         return 0;
   }
   
- -static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int rdmsr_interception(struct vcpu_svm *svm)
   {
         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
         u64 data;
@@@ -2174,17 -2145,10 +2172,17 @@@ static int svm_set_msr(struct kvm_vcpu 
   
         switch (ecx) {
         case MSR_IA32_TSC: {
- -              u64 tsc;
+ +              u64 tsc_offset = data - native_read_tsc();
+ +              u64 g_tsc_offset = 0;
+ +
+ +              if (is_nested(svm)) {
+ +                      g_tsc_offset = svm->vmcb->control.tsc_offset -
+ +                                     svm->nested.hsave->control.tsc_offset;
+ +                      svm->nested.hsave->control.tsc_offset = tsc_offset;
+ +              }
+ +
+ +              svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
   
- -              rdtscll(tsc);
- -              svm->vmcb->control.tsc_offset = data - tsc;
                 break;
         }
         case MSR_K6_STAR:
@@@ -2243,7 -2207,7 +2241,7 @@@
         return 0;
   }
   
- -static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int wrmsr_interception(struct vcpu_svm *svm)
   {
         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
         u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
@@@ -2259,18 -2223,17 +2257,18 @@@
         return 1;
   }
   
- -static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+ +static int msr_interception(struct vcpu_svm *svm)
   {
         if (svm->vmcb->control.exit_info_1)
- -              return wrmsr_interception(svm, kvm_run);
+ +              return wrmsr_interception(svm);
         else
- -              return rdmsr_interception(svm, kvm_run);
+ +              return rdmsr_interception(svm);
   }
   
- -static int interrupt_window_interception(struct vcpu_svm *svm,
- -                                 struct kvm_run *kvm_run)
+ +static int interrupt_window_interception(struct vcpu_svm *svm)
   {
+ +      struct kvm_run *kvm_run = svm->vcpu.run;
+ +
         svm_clear_vintr(svm);
         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
         /*
@@@ -2288,13 -2251,8 +2286,13 @@@
         return 1;
   }
   
- -static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
- -                                    struct kvm_run *kvm_run) = {
+ +static int pause_interception(struct vcpu_svm *svm)
+ +{
+ +      kvm_vcpu_on_spin(&(svm->vcpu));
+ +      return 1;
+ +}
+ +
+ +static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
         [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
         [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
         [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
@@@ -2329,7 -2287,6 +2327,7 @@@
         [SVM_EXIT_CPUID]                        = cpuid_interception,
         [SVM_EXIT_IRET]                         = iret_interception,
         [SVM_EXIT_INVD]                         = emulate_on_interception,
+ +      [SVM_EXIT_PAUSE]                        = pause_interception,
         [SVM_EXIT_HLT]                          = halt_interception,
         [SVM_EXIT_INVLPG]                       = invlpg_interception,
         [SVM_EXIT_INVLPGA]                      = invlpga_interception,
@@@ -2343,36 -2300,26 +2341,36 @@@
         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
         [SVM_EXIT_STGI]                         = stgi_interception,
         [SVM_EXIT_CLGI]                         = clgi_interception,
- -      [SVM_EXIT_SKINIT]                       = invalid_op_interception,
+ +      [SVM_EXIT_SKINIT]                       = skinit_interception,
         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
         [SVM_EXIT_MWAIT]                        = invalid_op_interception,
         [SVM_EXIT_NPF]                          = pf_interception,
   };
   
- -static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+ +static int handle_exit(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
+ +      struct kvm_run *kvm_run = vcpu->run;
         u32 exit_code = svm->vmcb->control.exit_code;
   
         trace_kvm_exit(exit_code, svm->vmcb->save.rip);
   
+ +      if (unlikely(svm->nested.exit_required)) {
+ +              nested_svm_vmexit(svm);
+ +              svm->nested.exit_required = false;
+ +
+ +              return 1;
+ +      }
+ +
         if (is_nested(svm)) {
                 int vmexit;
   
- -              nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- -                          exit_code, svm->vmcb->control.exit_info_1,
- -                          svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+ +              trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ +                                      svm->vmcb->control.exit_info_1,
+ +                                      svm->vmcb->control.exit_info_2,
+ +                                      svm->vmcb->control.exit_int_info,
+ +                                      svm->vmcb->control.exit_int_info_err);
   
                 vmexit = nested_svm_exit_special(svm);
   
@@@ -2422,15 -2369,15 +2420,15 @@@
                 return 0;
         }
   
- -      return svm_exit_handlers[exit_code](svm, kvm_run);
+ +      return svm_exit_handlers[exit_code](svm);
   }
   
   static void reload_tss(struct kvm_vcpu *vcpu)
   {
         int cpu = raw_smp_processor_id();
   
-       struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
-       svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       sd->tss_desc->type = 9; /* available 32/64-bit TSS */
         load_TR_desc();
   }
   
@@@ -2438,12 -2385,12 +2436,12 @@@ static void pre_svm_run(struct vcpu_sv
   {
         int cpu = raw_smp_processor_id();
   
-       struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
   
         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
         /* FIXME: handle wraparound of asid_generation */
-       if (svm->asid_generation != svm_data->asid_generation)
-               new_asid(svm, svm_data);
+       if (svm->asid_generation != sd->asid_generation)
+               new_asid(svm, sd);
   }
   
   static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@@ -2499,47 -2446,20 +2497,47 @@@ static int svm_nmi_allowed(struct kvm_v
                 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
   }
   
+ +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+ +{
+ +      struct vcpu_svm *svm = to_svm(vcpu);
+ +
+ +      return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+ +}
+ +
+ +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+ +{
+ +      struct vcpu_svm *svm = to_svm(vcpu);
+ +
+ +      if (masked) {
+ +              svm->vcpu.arch.hflags |= HF_NMI_MASK;
+ +              svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ +      } else {
+ +              svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+ +              svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+ +      }
+ +}
+ +
   static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
         struct vmcb *vmcb = svm->vmcb;
- -      return (vmcb->save.rflags & X86_EFLAGS_IF) &&
- -              !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- -              gif_set(svm) &&
- -              !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
+ +      int ret;
+ +
+ +      if (!gif_set(svm) ||
+ +           (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
+ +              return 0;
+ +
+ +      ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
+ +
+ +      if (is_nested(svm))
+ +              return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
+ +
+ +      return ret;
   }
   
   static void enable_irq_window(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
- -      nsvm_printk("Trying to open IRQ window\n");
   
         nested_svm_intr(svm);
   
@@@ -2564,7 -2484,7 +2562,7 @@@ static void enable_nmi_window(struct kv
         /* Something prevents NMI from been injected. Single step over
            possible problem (IRET or exception injection or interrupt
            shadow) */
- -      vcpu->arch.singlestep = true;
+ +      svm->nmi_singlestep = true;
         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
         update_db_intercept(vcpu);
   }
@@@ -2654,20 -2574,13 +2652,20 @@@ static void svm_complete_interrupts(str
   #define R "e"
   #endif
   
- -static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ +static void svm_vcpu_run(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
         u16 fs_selector;
         u16 gs_selector;
         u16 ldt_selector;
   
+ +      /*
+ +       * A vmexit emulation is required before the vcpu can be executed
+ +       * again.
+ +       */
+ +      if (unlikely(svm->nested.exit_required))
+ +              return;
+ +
         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@@ -2966,8 -2879,6 +2964,8 @@@ static struct kvm_x86_ops svm_x86_ops 
         .queue_exception = svm_queue_exception,
         .interrupt_allowed = svm_interrupt_allowed,
         .nmi_allowed = svm_nmi_allowed,
+ +      .get_nmi_mask = svm_get_nmi_mask,
+ +      .set_nmi_mask = svm_set_nmi_mask,
         .enable_nmi_window = enable_nmi_window,
         .enable_irq_window = enable_irq_window,
         .update_cr8_intercept = update_cr8_intercept,
diff --combined arch/x86/xen/smp.c

index 64757c0ba5fc2255a4d9188b3a68636832a73a1f,1167d9830f5f9c5cf259e4fba0273b8dd424993c..563d20504988ef7671cf600cdc2762f00418a4c8
--- 1/arch/x86/xen/smp.c
--- 2/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@@ -35,10 -35,10 +35,10 @@@
   
   cpumask_var_t xen_cpu_initialized_map;
   
- static DEFINE_PER_CPU(int, resched_irq);
- static DEFINE_PER_CPU(int, callfunc_irq);
- static DEFINE_PER_CPU(int, callfuncsingle_irq);
- static DEFINE_PER_CPU(int, debug_irq) = -1;
+ static DEFINE_PER_CPU(int, xen_resched_irq);
+ static DEFINE_PER_CPU(int, xen_callfunc_irq);
+ static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
+ static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
   
   static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
   static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@@ -73,7 -73,7 +73,7 @@@ static __cpuinit void cpu_bringup(void
   
         xen_setup_cpu_clockevents();
   
- -      cpu_set(cpu, cpu_online_map);
+ +      set_cpu_online(cpu, true);
         percpu_write(cpu_state, CPU_ONLINE);
         wmb();
   
@@@ -103,7 -103,7 +103,7 @@@ static int xen_smp_intr_init(unsigned i
                                     NULL);
         if (rc < 0)
                 goto fail;
-       per_cpu(resched_irq, cpu) = rc;
+       per_cpu(xen_resched_irq, cpu) = rc;
   
         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
@@@ -114,7 -114,7 +114,7 @@@
                                     NULL);
         if (rc < 0)
                 goto fail;
-       per_cpu(callfunc_irq, cpu) = rc;
+       per_cpu(xen_callfunc_irq, cpu) = rc;
   
         debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
         rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
@@@ -122,7 -122,7 +122,7 @@@
                                      debug_name, NULL);
         if (rc < 0)
                 goto fail;
-       per_cpu(debug_irq, cpu) = rc;
+       per_cpu(xen_debug_irq, cpu) = rc;
   
         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
@@@ -133,19 -133,20 +133,20 @@@
                                     NULL);
         if (rc < 0)
                 goto fail;
-       per_cpu(callfuncsingle_irq, cpu) = rc;
+       per_cpu(xen_callfuncsingle_irq, cpu) = rc;
   
         return 0;
   
    fail:
-       if (per_cpu(resched_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
-       if (per_cpu(callfunc_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
-       if (per_cpu(debug_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
-       if (per_cpu(callfuncsingle_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+       if (per_cpu(xen_resched_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+       if (per_cpu(xen_callfunc_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+       if (per_cpu(xen_debug_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+       if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
+                                      NULL);
   
         return rc;
   }
@@@ -295,7 -296,6 +296,7 @@@ static int __cpuinit xen_cpu_up(unsigne
                 (unsigned long)task_stack_page(idle) -
                 KERNEL_STACK_OFFSET + THREAD_SIZE;
   #endif
+ +      xen_setup_runstate_info(cpu);
         xen_setup_timer(cpu);
         xen_init_lock_cpu(cpu);
   
@@@ -349,10 -349,10 +350,10 @@@ static void xen_cpu_die(unsigned int cp
                 current->state = TASK_UNINTERRUPTIBLE;
                 schedule_timeout(HZ/10);
         }
-       unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
         xen_uninit_lock_cpu(cpu);
         xen_teardown_timer(cpu);
   
diff --combined arch/x86/xen/time.c

index 9d1f853120d859cfc814e0f2eaa2e01b3f1575e8,26e37b787ad30f7665a015df7a094dfc537e77a0..0d3f07cd1b5fe9aee977674b11b0beb311e25eb0
--- 1/arch/x86/xen/time.c
--- 2/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@@ -31,14 -31,14 +31,14 @@@
   #define NS_PER_TICK   (1000000000LL / HZ)
   
   /* runstate info updated by Xen */
- static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
   
   /* snapshots of runstate info */
- static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
+ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
   
   /* unused ns of stolen and blocked time */
- static DEFINE_PER_CPU(u64, residual_stolen);
- static DEFINE_PER_CPU(u64, residual_blocked);
+ static DEFINE_PER_CPU(u64, xen_residual_stolen);
+ static DEFINE_PER_CPU(u64, xen_residual_blocked);
   
   /* return an consistent snapshot of 64-bit time/counter value */
   static u64 get64(const u64 *p)
@@@ -79,7 -79,7 +79,7 @@@ static void get_runstate_snapshot(struc
   
         BUG_ON(preemptible());
   
-       state = &__get_cpu_var(runstate);
+       state = &__get_cpu_var(xen_runstate);
   
         /*
          * The runstate info is always updated by the hypervisor on
@@@ -97,14 -97,14 +97,14 @@@
   /* return true when a vcpu could run but has no real cpu to run on */
   bool xen_vcpu_stolen(int vcpu)
   {
-       return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
+       return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
   }
   
- -static void setup_runstate_info(int cpu)
+ +void xen_setup_runstate_info(int cpu)
   {
         struct vcpu_register_runstate_memory_area area;
   
-       area.addr.v = &per_cpu(runstate, cpu);
+       area.addr.v = &per_cpu(xen_runstate, cpu);
   
         if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
                                cpu, &area))
@@@ -122,7 -122,7 +122,7 @@@ static void do_stolen_accounting(void
   
         WARN_ON(state.state != RUNSTATE_running);
   
-       snap = &__get_cpu_var(runstate_snapshot);
+       snap = &__get_cpu_var(xen_runstate_snapshot);
   
         /* work out how much time the VCPU has not been runn*ing*  */
         blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
@@@ -133,24 -133,24 +133,24 @@@
   
         /* Add the appropriate number of ticks of stolen time,
            including any left-overs from last time. */
-       stolen = runnable + offline + __get_cpu_var(residual_stolen);
+       stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
   
         if (stolen < 0)
                 stolen = 0;
   
         ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-       __get_cpu_var(residual_stolen) = stolen;
+       __get_cpu_var(xen_residual_stolen) = stolen;
         account_steal_ticks(ticks);
   
         /* Add the appropriate number of ticks of blocked time,
            including any left-overs from last time. */
-       blocked += __get_cpu_var(residual_blocked);
+       blocked += __get_cpu_var(xen_residual_blocked);
   
         if (blocked < 0)
                 blocked = 0;
   
         ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
-       __get_cpu_var(residual_blocked) = blocked;
+       __get_cpu_var(xen_residual_blocked) = blocked;
         account_idle_ticks(ticks);
   }
   
@@@ -434,7 -434,7 +434,7 @@@ void xen_setup_timer(int cpu
                 name = "<timer kasprintf failed>";
   
         irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- -                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ +                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
                                       name, NULL);
   
         evt = &per_cpu(xen_clock_events, cpu);
@@@ -442,6 -442,8 +442,6 @@@
   
         evt->cpumask = cpumask_of(cpu);
         evt->irq = irq;
- -
- -      setup_runstate_info(cpu);
   }
   
   void xen_teardown_timer(int cpu)
@@@ -492,7 -494,6 +492,7 @@@ __init void xen_time_init(void
   
         setup_force_cpu_cap(X86_FEATURE_TSC);
   
+ +      xen_setup_runstate_info(cpu);
         xen_setup_timer(cpu);
         xen_setup_cpu_clockevents();
   }
diff --combined crypto/cryptd.c

index f8ae0d94a6471e0703bfc24900f88dfeb1fc8be7,3d7fe8306e2a4377ecea748543ffccdb71d1b6cb..704c141153236917288ee7a09c12ce8b037754d8
--- 1/crypto/cryptd.c
--- 2/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@@ -99,7 -99,7 +99,7 @@@ static int cryptd_enqueue_request(struc
         struct cryptd_cpu_queue *cpu_queue;
   
         cpu = get_cpu();
-       cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
+       cpu_queue = this_cpu_ptr(queue->cpu_queue);
         err = crypto_enqueue_request(&cpu_queue->queue, request);
         queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
         put_cpu();
@@@ -711,13 -711,6 +711,13 @@@ struct crypto_shash *cryptd_ahash_child
   }
   EXPORT_SYMBOL_GPL(cryptd_ahash_child);
   
+ +struct shash_desc *cryptd_shash_desc(struct ahash_request *req)
+ +{
+ +      struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+ +      return &rctx->desc;
+ +}
+ +EXPORT_SYMBOL_GPL(cryptd_shash_desc);
+ +
   void cryptd_free_ahash(struct cryptd_ahash *tfm)
   {
         crypto_free_ahash(&tfm->base);
diff --combined drivers/base/cpu.c

index 27fd775375b04965b834d7817db43efa059a2c97,69ee5b7517ecfa400a6ccbe111ccfe311d6eb7db..958bd1540c303d92f84cbcea016c7da4c0ce925c
--- 1/drivers/base/cpu.c
--- 2/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@@ -35,7 -35,6 +35,7 @@@ static ssize_t __ref store_online(struc
         struct cpu *cpu = container_of(dev, struct cpu, sysdev);
         ssize_t ret;
   
+ +      cpu_hotplug_driver_lock();
         switch (buf[0]) {
         case '0':
                 ret = cpu_down(cpu->sysdev.id);
@@@ -50,7 -49,6 +50,7 @@@
         default:
                 ret = -EINVAL;
         }
+ +      cpu_hotplug_driver_unlock();
   
         if (ret >= 0)
                 ret = count;
@@@ -74,38 -72,6 +74,38 @@@ void unregister_cpu(struct cpu *cpu
         per_cpu(cpu_sys_devices, logical_cpu) = NULL;
         return;
   }
+ +
+ +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ +static ssize_t cpu_probe_store(struct class *class, const char *buf,
+ +                             size_t count)
+ +{
+ +      return arch_cpu_probe(buf, count);
+ +}
+ +
+ +static ssize_t cpu_release_store(struct class *class, const char *buf,
+ +                               size_t count)
+ +{
+ +      return arch_cpu_release(buf, count);
+ +}
+ +
+ +static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+ +static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+ +
+ +int __init cpu_probe_release_init(void)
+ +{
+ +      int rc;
+ +
+ +      rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ +                             &class_attr_probe.attr);
+ +      if (!rc)
+ +              rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ +                                     &class_attr_release.attr);
+ +
+ +      return rc;
+ +}
+ +device_initcall(cpu_probe_release_init);
+ +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+ +
   #else /* ... !CONFIG_HOTPLUG_CPU */
   static inline void register_cpu_control(struct cpu *cpu)
   {
@@@ -131,7 -97,7 +131,7 @@@ static ssize_t show_crash_notes(struct 
          * boot up and this data does not change there after. Hence this
          * operation should be safe. No locking required.
          */
-       addr = __pa(per_cpu_ptr(crash_notes, cpunum));
+       addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
         rc = sprintf(buf, "%Lx\n", addr);
         return rc;
   }
diff --combined drivers/cpufreq/cpufreq.c

index f20668c09ce0611d3da1fa8582514ccfec949347,af93a8175c5eee0bef7f15fe2d7de4854c88d064..67bc2ece7b4b508da7855937a01eb388cd33ab5b
--- 1/drivers/cpufreq/cpufreq.c
--- 2/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@@ -41,7 -41,7 +41,7 @@@ static struct cpufreq_driver *cpufreq_d
   static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
   #ifdef CONFIG_HOTPLUG_CPU
   /* This one keeps track of the previously set governor of a removed CPU */
- -static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
+ +static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
   #endif
   static DEFINE_SPINLOCK(cpufreq_driver_lock);
   
@@@ -64,14 -64,14 +64,14 @@@
    * - Lock should not be held across
    *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
    */
- static DEFINE_PER_CPU(int, policy_cpu);
+ static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
   static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
   
   #define lock_policy_rwsem(mode, cpu)                                  \
   int lock_policy_rwsem_##mode                                          \
   (int cpu)                                                             \
   {                                                                     \
-       int policy_cpu = per_cpu(policy_cpu, cpu);                      \
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
         BUG_ON(policy_cpu == -1);                                       \
         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
         if (unlikely(!cpu_online(cpu))) {                               \
@@@ -90,7 -90,7 +90,7 @@@ EXPORT_SYMBOL_GPL(lock_policy_rwsem_wri
   
   void unlock_policy_rwsem_read(int cpu)
   {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
         BUG_ON(policy_cpu == -1);
         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
   }
@@@ -98,7 -98,7 +98,7 @@@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_r
   
   void unlock_policy_rwsem_write(int cpu)
   {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
         BUG_ON(policy_cpu == -1);
         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
   }
@@@ -647,21 -647,6 +647,21 @@@ static ssize_t show_scaling_setspeed(st
         return policy->governor->show_setspeed(policy, buf);
   }
   
+ +/**
+ + * show_scaling_driver - show the current cpufreq HW/BIOS limitation
+ + */
+ +static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
+ +{
+ +      unsigned int limit;
+ +      int ret;
+ +      if (cpufreq_driver->bios_limit) {
+ +              ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+ +              if (!ret)
+ +                      return sprintf(buf, "%u\n", limit);
+ +      }
+ +      return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
+ +}
+ +
   #define define_one_ro(_name) \
   static struct freq_attr _name = \
   __ATTR(_name, 0444, show_##_name, NULL)
@@@ -681,7 -666,6 +681,7 @@@ define_one_ro(cpuinfo_transition_latenc
   define_one_ro(scaling_available_governors);
   define_one_ro(scaling_driver);
   define_one_ro(scaling_cur_freq);
+ +define_one_ro(bios_limit);
   define_one_ro(related_cpus);
   define_one_ro(affected_cpus);
   define_one_rw(scaling_min_freq);
@@@ -783,20 -767,17 +783,20 @@@ static struct kobj_type ktype_cpufreq 
    *   0:        Success
    *   Positive: When we have a managed CPU and the sysfs got symlinked
    */
- -int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy,
- -              struct sys_device *sys_dev)
+ +static int cpufreq_add_dev_policy(unsigned int cpu,
+ +                                struct cpufreq_policy *policy,
+ +                                struct sys_device *sys_dev)
   {
         int ret = 0;
   #ifdef CONFIG_SMP
         unsigned long flags;
         unsigned int j;
- -
   #ifdef CONFIG_HOTPLUG_CPU
- -      if (per_cpu(cpufreq_cpu_governor, cpu)) {
- -              policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
+ +      struct cpufreq_governor *gov;
+ +
+ +      gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
+ +      if (gov) {
+ +              policy->governor = gov;
                 dprintk("Restoring governor %s for cpu %d\n",
                        policy->governor->name, cpu);
         }
@@@ -818,7 -799,7 +818,7 @@@
   
                         /* Set proper policy_cpu */
                         unlock_policy_rwsem_write(cpu);
-                       per_cpu(policy_cpu, cpu) = managed_policy->cpu;
+                       per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
   
                         if (lock_policy_rwsem_write(cpu) < 0) {
                                 /* Should not go through policy unlock path */
@@@ -859,8 -840,7 +859,8 @@@
   
   
   /* symlink affected CPUs */
- -int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy)
+ +static int cpufreq_add_dev_symlink(unsigned int cpu,
+ +                                 struct cpufreq_policy *policy)
   {
         unsigned int j;
         int ret = 0;
@@@ -887,9 -867,8 +887,9 @@@
         return ret;
   }
   
- -int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy,
- -              struct sys_device *sys_dev)
+ +static int cpufreq_add_dev_interface(unsigned int cpu,
+ +                                   struct cpufreq_policy *policy,
+ +                                   struct sys_device *sys_dev)
   {
         struct cpufreq_policy new_policy;
         struct freq_attr **drv_attr;
@@@ -921,18 -900,13 +921,18 @@@
                 if (ret)
                         goto err_out_kobj_put;
         }
+ +      if (cpufreq_driver->bios_limit) {
+ +              ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
+ +              if (ret)
+ +                      goto err_out_kobj_put;
+ +      }
   
         spin_lock_irqsave(&cpufreq_driver_lock, flags);
         for_each_cpu(j, policy->cpus) {
         if (!cpu_online(j))
                 continue;
                 per_cpu(cpufreq_cpu_data, j) = policy;
-               per_cpu(policy_cpu, j) = policy->cpu;
+               per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
         }
         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
   
@@@ -975,13 -949,10 +975,13 @@@ err_out_kobj_put
   static int cpufreq_add_dev(struct sys_device *sys_dev)
   {
         unsigned int cpu = sys_dev->id;
- -      int ret = 0;
+ +      int ret = 0, found = 0;
         struct cpufreq_policy *policy;
         unsigned long flags;
         unsigned int j;
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      int sibling;
+ +#endif
   
         if (cpu_is_offline(cpu))
                 return 0;
@@@ -1020,7 -991,7 +1020,7 @@@
         cpumask_copy(policy->cpus, cpumask_of(cpu));
   
         /* Initially set CPU itself as the policy_cpu */
-       per_cpu(policy_cpu, cpu) = cpu;
+       per_cpu(cpufreq_policy_cpu, cpu) = cpu;
         ret = (lock_policy_rwsem_write(cpu) < 0);
         WARN_ON(ret);
   
@@@ -1028,19 -999,7 +1028,19 @@@
         INIT_WORK(&policy->update, handle_update);
   
         /* Set governor before ->init, so that driver could check it */
- -      policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      for_each_online_cpu(sibling) {
+ +              struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
+ +              if (cp && cp->governor &&
+ +                  (cpumask_test_cpu(cpu, cp->related_cpus))) {
+ +                      policy->governor = cp->governor;
+ +                      found = 1;
+ +                      break;
+ +              }
+ +      }
+ +#endif
+ +      if (!found)
+ +              policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
         /* call driver. From then on the cpufreq must be able
          * to accept all calls to ->verify and ->setpolicy for this CPU
          */
@@@ -1152,8 -1111,7 +1152,8 @@@ static int __cpufreq_remove_dev(struct 
   #ifdef CONFIG_SMP
   
   #ifdef CONFIG_HOTPLUG_CPU
- -      per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
+ +      strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
+ +                      CPUFREQ_NAME_LEN);
   #endif
   
         /* if we have other CPUs still registered, we need to unlink them,
@@@ -1177,8 -1135,7 +1177,8 @@@
                                 continue;
                         dprintk("removing link for cpu %u\n", j);
   #ifdef CONFIG_HOTPLUG_CPU
- -                      per_cpu(cpufreq_cpu_governor, j) = data->governor;
+ +                      strncpy(per_cpu(cpufreq_cpu_governor, j),
+ +                              data->governor->name, CPUFREQ_NAME_LEN);
   #endif
                         cpu_sys_dev = get_cpu_sysdev(j);
                         sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
@@@ -1649,22 -1606,9 +1649,22 @@@ EXPORT_SYMBOL_GPL(cpufreq_register_gove
   
   void cpufreq_unregister_governor(struct cpufreq_governor *governor)
   {
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      int cpu;
+ +#endif
+ +
         if (!governor)
                 return;
   
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +      for_each_present_cpu(cpu) {
+ +              if (cpu_online(cpu))
+ +                      continue;
+ +              if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
+ +                      strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
+ +      }
+ +#endif
+ +
         mutex_lock(&cpufreq_governor_mutex);
         list_del(&governor->governor_list);
         mutex_unlock(&cpufreq_governor_mutex);
@@@ -2002,7 -1946,7 +2002,7 @@@ static int __init cpufreq_core_init(voi
         int cpu;
   
         for_each_possible_cpu(cpu) {
-               per_cpu(policy_cpu, cpu) = -1;
+               per_cpu(cpufreq_policy_cpu, cpu) = -1;
                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
         }
   
diff --combined drivers/crypto/padlock-aes.c

index 84c51e17726966c196ac53ce05671e972f65b98a,721d004a0235247d4b974adbfe8d90bdd4ea43f2..8c2f3703ec855f27a6863cb964c5d58a9d01dfa2
--- 1/drivers/crypto/padlock-aes.c
--- 2/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@@ -64,7 -64,7 +64,7 @@@ struct aes_ctx 
         u32 *D;
   };
   
- static DEFINE_PER_CPU(struct cword *, last_cword);
+ static DEFINE_PER_CPU(struct cword *, paes_last_cword);
   
   /* Tells whether the ACE is capable to generate
      the extended key for a given key_len. */
@@@ -152,9 -152,9 +152,9 @@@ static int aes_set_key(struct crypto_tf
   
   ok:
         for_each_online_cpu(cpu)
-               if (&ctx->cword.encrypt == per_cpu(last_cword, cpu) ||
-                   &ctx->cword.decrypt == per_cpu(last_cword, cpu))
-                       per_cpu(last_cword, cpu) = NULL;
+               if (&ctx->cword.encrypt == per_cpu(paes_last_cword, cpu) ||
+                   &ctx->cword.decrypt == per_cpu(paes_last_cword, cpu))
+                       per_cpu(paes_last_cword, cpu) = NULL;
   
         return 0;
   }
@@@ -166,7 -166,7 +166,7 @@@ static inline void padlock_reset_key(st
   {
         int cpu = raw_smp_processor_id();
   
-       if (cword != per_cpu(last_cword, cpu))
+       if (cword != per_cpu(paes_last_cword, cpu))
   #ifndef CONFIG_X86_64
                 asm volatile ("pushfl; popfl");
   #else
@@@ -176,7 -176,7 +176,7 @@@
   
   static inline void padlock_store_cword(struct cword *cword)
   {
-       per_cpu(last_cword, raw_smp_processor_id()) = cword;
+       per_cpu(paes_last_cword, raw_smp_processor_id()) = cword;
   }
   
   /*
@@@ -236,7 -236,7 +236,7 @@@ static inline void ecb_crypt(const u8 *
         /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
          * We could avoid some copying here but it's probably not worth it.
          */
- -      if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) {
+ +      if (unlikely(((unsigned long)in & ~PAGE_MASK) + ecb_fetch_bytes > PAGE_SIZE)) {
                 ecb_crypt_copy(in, out, key, cword, count);
                 return;
         }
@@@ -248,7 -248,7 +248,7 @@@ static inline u8 *cbc_crypt(const u8 *i
                             u8 *iv, struct cword *cword, int count)
   {
         /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
- -      if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE))
+ +      if (unlikely(((unsigned long)in & ~PAGE_MASK) + cbc_fetch_bytes > PAGE_SIZE))
                 return cbc_crypt_copy(in, out, key, iv, cword, count);
   
         return rep_xcrypt_cbc(in, out, key, iv, cword, count);
diff --combined drivers/dma/dmaengine.c

index 8f99354082ceaa169f7ac081594bc83b0c003478,51d7480d3a92d537abe018d44b830f2318bc5bdf..6f51a0a7a8bbdbca798f53293516e178ead5f4d2
--- 1/drivers/dma/dmaengine.c
--- 2/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@@ -326,14 -326,7 +326,7 @@@ arch_initcall(dma_channel_table_init)
    */
   struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
   {
-       struct dma_chan *chan;
-       int cpu;
- 
-       cpu = get_cpu();
-       chan = per_cpu_ptr(channel_table[tx_type], cpu)->chan;
-       put_cpu();
- 
-       return chan;
+       return this_cpu_read(channel_table[tx_type]->chan);
   }
   EXPORT_SYMBOL(dma_find_channel);
   
@@@ -632,21 -625,11 +625,21 @@@ static bool device_has_all_tx_types(str
         #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
         if (!dma_has_cap(DMA_XOR, device->cap_mask))
                 return false;
+ +
+ +      #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+ +      if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
+ +              return false;
+ +      #endif
         #endif
   
         #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
         if (!dma_has_cap(DMA_PQ, device->cap_mask))
                 return false;
+ +
+ +      #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+ +      if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
+ +              return false;
+ +      #endif
         #endif
   
         return true;
@@@ -857,7 -840,6 +850,6 @@@ dma_async_memcpy_buf_to_buf(struct dma_
         struct dma_async_tx_descriptor *tx;
         dma_addr_t dma_dest, dma_src;
         dma_cookie_t cookie;
-       int cpu;
         unsigned long flags;
   
         dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
@@@ -876,10 -858,10 +868,10 @@@
         tx->callback = NULL;
         cookie = tx->tx_submit(tx);
   
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
   
         return cookie;
   }
@@@ -906,7 -888,6 +898,6 @@@ dma_async_memcpy_buf_to_pg(struct dma_c
         struct dma_async_tx_descriptor *tx;
         dma_addr_t dma_dest, dma_src;
         dma_cookie_t cookie;
-       int cpu;
         unsigned long flags;
   
         dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
@@@ -923,10 -904,10 +914,10 @@@
         tx->callback = NULL;
         cookie = tx->tx_submit(tx);
   
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
   
         return cookie;
   }
@@@ -955,7 -936,6 +946,6 @@@ dma_async_memcpy_pg_to_pg(struct dma_ch
         struct dma_async_tx_descriptor *tx;
         dma_addr_t dma_dest, dma_src;
         dma_cookie_t cookie;
-       int cpu;
         unsigned long flags;
   
         dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
@@@ -973,10 -953,10 +963,10 @@@
         tx->callback = NULL;
         cookie = tx->tx_submit(tx);
   
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
   
         return cookie;
   }
diff --combined drivers/net/loopback.c

index eae4ad749e9d87956facbfadd53036d5d882ce03,8ebeb76a373d862c8d157017917990bf626e11dd..b9fcc9819837e7f79530ad11782272ea0f81a344
--- 1/drivers/net/loopback.c
--- 2/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@@ -81,7 -81,7 +81,7 @@@ static netdev_tx_t loopback_xmit(struc
   
         /* it's OK to use per_cpu_ptr() because BHs are off */
         pcpu_lstats = dev->ml_priv;
-       lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
+       lb_stats = this_cpu_ptr(pcpu_lstats);
   
         len = skb->len;
         if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
@@@ -207,12 -207,20 +207,12 @@@ static __net_init int loopback_net_init
   out_free_netdev:
         free_netdev(dev);
   out:
- -      if (net == &init_net)
+ +      if (net_eq(net, &init_net))
                 panic("loopback: Failed to register netdevice: %d\n", err);
         return err;
   }
   
- -static __net_exit void loopback_net_exit(struct net *net)
- -{
- -      struct net_device *dev = net->loopback_dev;
- -
- -      unregister_netdev(dev);
- -}
- -
   /* Registered in net/core/dev.c */
   struct pernet_operations __net_initdata loopback_net_ops = {
          .init = loopback_net_init,
- -       .exit = loopback_net_exit,
   };
diff --combined drivers/net/veth.c

index 63099c58a6ddd7a92f616ca196141becbd7ad463,0c4a811242574e9d6432e83db588ff9122f78a0c..3a15de56df9caa31b818139dbcd75e2dc99e5cc8
--- 1/drivers/net/veth.c
--- 2/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@@ -153,25 -153,34 +153,24 @@@ static netdev_tx_t veth_xmit(struct sk_
         struct net_device *rcv = NULL;
         struct veth_priv *priv, *rcv_priv;
         struct veth_net_stats *stats, *rcv_stats;
-       int length, cpu;
+       int length;
   
- -      skb_orphan(skb);
- -
         priv = netdev_priv(dev);
         rcv = priv->peer;
         rcv_priv = netdev_priv(rcv);
   
-       cpu = smp_processor_id();
-       stats = per_cpu_ptr(priv->stats, cpu);
-       rcv_stats = per_cpu_ptr(rcv_priv->stats, cpu);
+       stats = this_cpu_ptr(priv->stats);
+       rcv_stats = this_cpu_ptr(rcv_priv->stats);
   
         if (!(rcv->flags & IFF_UP))
                 goto tx_drop;
   
- -      if (skb->len > (rcv->mtu + MTU_PAD))
- -              goto rx_drop;
- -
- -        skb->tstamp.tv64 = 0;
- -      skb->pkt_type = PACKET_HOST;
- -      skb->protocol = eth_type_trans(skb, rcv);
         if (dev->features & NETIF_F_NO_CSUM)
                 skb->ip_summed = rcv_priv->ip_summed;
   
- -      skb->mark = 0;
- -      secpath_reset(skb);
- -      nf_reset(skb);
- -
- -      length = skb->len;
+ +      length = skb->len + ETH_HLEN;
+ +      if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
+ +              goto rx_drop;
   
         stats->tx_bytes += length;
         stats->tx_packets++;
@@@ -179,6 -188,7 +178,6 @@@
         rcv_stats->rx_bytes += length;
         rcv_stats->rx_packets++;
   
- -      netif_rx(skb);
         return NETDEV_TX_OK;
   
   tx_drop:
@@@ -199,29 -209,32 +198,29 @@@ rx_drop
   static struct net_device_stats *veth_get_stats(struct net_device *dev)
   {
         struct veth_priv *priv;
- -      struct net_device_stats *dev_stats;
         int cpu;
- -      struct veth_net_stats *stats;
+ +      struct veth_net_stats *stats, total = {0};
   
         priv = netdev_priv(dev);
- -      dev_stats = &dev->stats;
   
- -      dev_stats->rx_packets = 0;
- -      dev_stats->tx_packets = 0;
- -      dev_stats->rx_bytes = 0;
- -      dev_stats->tx_bytes = 0;
- -      dev_stats->tx_dropped = 0;
- -      dev_stats->rx_dropped = 0;
- -
- -      for_each_online_cpu(cpu) {
+ +      for_each_possible_cpu(cpu) {
                 stats = per_cpu_ptr(priv->stats, cpu);
   
- -              dev_stats->rx_packets += stats->rx_packets;
- -              dev_stats->tx_packets += stats->tx_packets;
- -              dev_stats->rx_bytes += stats->rx_bytes;
- -              dev_stats->tx_bytes += stats->tx_bytes;
- -              dev_stats->tx_dropped += stats->tx_dropped;
- -              dev_stats->rx_dropped += stats->rx_dropped;
+ +              total.rx_packets += stats->rx_packets;
+ +              total.tx_packets += stats->tx_packets;
+ +              total.rx_bytes   += stats->rx_bytes;
+ +              total.tx_bytes   += stats->tx_bytes;
+ +              total.tx_dropped += stats->tx_dropped;
+ +              total.rx_dropped += stats->rx_dropped;
         }
- -
- -      return dev_stats;
+ +      dev->stats.rx_packets = total.rx_packets;
+ +      dev->stats.tx_packets = total.tx_packets;
+ +      dev->stats.rx_bytes   = total.rx_bytes;
+ +      dev->stats.tx_bytes   = total.tx_bytes;
+ +      dev->stats.tx_dropped = total.tx_dropped;
+ +      dev->stats.rx_dropped = total.rx_dropped;
+ +
+ +      return &dev->stats;
   }
   
   static int veth_open(struct net_device *dev)
@@@ -326,7 -339,7 +325,7 @@@ static int veth_validate(struct nlattr 
   
   static struct rtnl_link_ops veth_link_ops;
   
- -static int veth_newlink(struct net_device *dev,
+ +static int veth_newlink(struct net *src_net, struct net_device *dev,
                          struct nlattr *tb[], struct nlattr *data[])
   {
         int err;
@@@ -334,7 -347,6 +333,7 @@@
         struct veth_priv *priv;
         char ifname[IFNAMSIZ];
         struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
+ +      struct net *net;
   
         /*
          * create and register peer first
@@@ -367,22 -379,14 +366,22 @@@
         else
                 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
   
- -      peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp);
- -      if (IS_ERR(peer))
+ +      net = rtnl_link_get_net(src_net, tbp);
+ +      if (IS_ERR(net))
+ +              return PTR_ERR(net);
+ +
+ +      peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp);
+ +      if (IS_ERR(peer)) {
+ +              put_net(net);
                 return PTR_ERR(peer);
+ +      }
   
         if (tbp[IFLA_ADDRESS] == NULL)
                 random_ether_addr(peer->dev_addr);
   
         err = register_netdevice(peer);
+ +      put_net(net);
+ +      net = NULL;
         if (err < 0)
                 goto err_register_peer;
   
@@@ -437,7 -441,7 +436,7 @@@ err_register_peer
         return err;
   }
   
- -static void veth_dellink(struct net_device *dev)
+ +static void veth_dellink(struct net_device *dev, struct list_head *head)
   {
         struct veth_priv *priv;
         struct net_device *peer;
@@@ -445,8 -449,8 +444,8 @@@
         priv = netdev_priv(dev);
         peer = priv->peer;
   
- -      unregister_netdevice(dev);
- -      unregister_netdevice(peer);
+ +      unregister_netdevice_queue(dev, head);
+ +      unregister_netdevice_queue(peer, head);
   }
   
   static const struct nla_policy veth_policy[VETH_INFO_MAX + 1];
diff --combined drivers/s390/net/netiucv.c

index 395c04c2b00fcd5cca85a696bccddf6f540b7064,14e61441ba0b7279740e8e1a5b6f156c10560565..98c04cac43c1d87467ee60f43807c0dbc654409b
--- 1/drivers/s390/net/netiucv.c
--- 2/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@@ -113,11 -113,9 +113,9 @@@ static inline int iucv_dbf_passes(debug
   #define IUCV_DBF_TEXT_(name, level, text...) \
         do { \
                 if (iucv_dbf_passes(iucv_dbf_##name, level)) { \
-                       char* iucv_dbf_txt_buf = \
-                                       get_cpu_var(iucv_dbf_txt_buf); \
-                       sprintf(iucv_dbf_txt_buf, text); \
-                       debug_text_event(iucv_dbf_##name, level, \
-                                               iucv_dbf_txt_buf); \
+                       char* __buf = get_cpu_var(iucv_dbf_txt_buf); \
+                       sprintf(__buf, text); \
+                       debug_text_event(iucv_dbf_##name, level, __buf); \
                         put_cpu_var(iucv_dbf_txt_buf); \
                 } \
         } while (0)
@@@ -741,13 -739,13 +739,13 @@@ static void conn_action_txdone(fsm_inst
         if (single_flag) {
                 if ((skb = skb_dequeue(&conn->commit_queue))) {
                         atomic_dec(&skb->users);
- -                      dev_kfree_skb_any(skb);
                         if (privptr) {
                                 privptr->stats.tx_packets++;
                                 privptr->stats.tx_bytes +=
                                         (skb->len - NETIUCV_HDRLEN
- -                                                - NETIUCV_HDRLEN);
+ +                                                - NETIUCV_HDRLEN);
                         }
+ +                      dev_kfree_skb_any(skb);
                 }
         }
         conn->tx_buff->data = conn->tx_buff->head;
diff --combined fs/ext4/mballoc.c

index c1e19d5b5985f2a36d39801e697b9b45f7283f50,d527fd384582e75f872ad40ba001353bdef3aab3..b1fd3daadc9c50522894fd00ec62bb8e600f9fd0
--- 1/fs/ext4/mballoc.c
--- 2/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@@ -142,7 -142,7 +142,7 @@@
    * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
    * value of s_mb_order2_reqs can be tuned via
    * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
- - * stripe size (sbi->s_stripe), we try to search for contigous block in
+ + * stripe size (sbi->s_stripe), we try to search for contiguous block in
    * stripe size. This should result in better allocation on RAID setups. If
    * not, we search in the specific group using bitmap for best extents. The
    * tunable min_to_scan and max_to_scan control the behaviour here.
@@@ -2529,6 -2529,7 +2529,6 @@@ static void release_blocks_on_commit(jo
         struct ext4_group_info *db;
         int err, count = 0, count2 = 0;
         struct ext4_free_data *entry;
- -      ext4_fsblk_t discard_block;
         struct list_head *l, *ltmp;
   
         list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@@ -2558,19 -2559,13 +2558,19 @@@
                         page_cache_release(e4b.bd_bitmap_page);
                 }
                 ext4_unlock_group(sb, entry->group);
- -              discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
- -                      + entry->start_blk
- -                      + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- -              trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
- -                                        entry->count);
- -              sb_issue_discard(sb, discard_block, entry->count);
- -
+ +              if (test_opt(sb, DISCARD)) {
+ +                      ext4_fsblk_t discard_block;
+ +                      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ +
+ +                      discard_block = (ext4_fsblk_t)entry->group *
+ +                                              EXT4_BLOCKS_PER_GROUP(sb)
+ +                                      + entry->start_blk
+ +                                      + le32_to_cpu(es->s_first_data_block);
+ +                      trace_ext4_discard_blocks(sb,
+ +                                      (unsigned long long)discard_block,
+ +                                      entry->count);
+ +                      sb_issue_discard(sb, discard_block, entry->count);
+ +              }
                 kmem_cache_free(ext4_free_ext_cachep, entry);
                 ext4_mb_release_desc(&e4b);
         }
@@@ -3010,24 -3005,6 +3010,24 @@@ static void ext4_mb_collect_stats(struc
                 trace_ext4_mballoc_prealloc(ac);
   }
   
+ +/*
+ + * Called on failure; free up any blocks from the inode PA for this
+ + * context.  We don't need this for MB_GROUP_PA because we only change
+ + * pa_free in ext4_mb_release_context(), but on failure, we've already
+ + * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
+ + */
+ +static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
+ +{
+ +      struct ext4_prealloc_space *pa = ac->ac_pa;
+ +      int len;
+ +
+ +      if (pa && pa->pa_type == MB_INODE_PA) {
+ +              len = ac->ac_b_ex.fe_len;
+ +              pa->pa_free += len;
+ +      }
+ +
+ +}
+ +
   /*
    * use blocks preallocated to inode
    */
@@@ -3955,7 -3932,7 +3955,7 @@@ static void ext4_mb_group_or_file(struc
          * per cpu locality group is to reduce the contention between block
          * request from multiple CPUs.
          */
-       ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
+       ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
   
         /* we're going to use group allocation */
         ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@@ -4313,7 -4290,6 +4313,7 @@@ repeat
                         ac->ac_status = AC_STATUS_CONTINUE;
                         goto repeat;
                 } else if (*errp) {
+ +                      ext4_discard_allocated_blocks(ac);
                         ac->ac_b_ex.fe_len = 0;
                         ar->len = 0;
                         ext4_mb_show_ac(ac);
@@@ -4446,24 -4422,18 +4446,24 @@@ ext4_mb_free_metadata(handle_t *handle
         return 0;
   }
   
- -/*
- - * Main entry point into mballoc to free blocks
+ +/**
+ + * ext4_free_blocks() -- Free given blocks and update quota
+ + * @handle:           handle for this transaction
+ + * @inode:            inode
+ + * @block:            start physical block to free
+ + * @count:            number of blocks to count
+ + * @metadata:                 Are these metadata blocks
    */
- -void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
- -                      ext4_fsblk_t block, unsigned long count,
- -                      int metadata, unsigned long *freed)
+ +void ext4_free_blocks(handle_t *handle, struct inode *inode,
+ +                    struct buffer_head *bh, ext4_fsblk_t block,
+ +                    unsigned long count, int flags)
   {
         struct buffer_head *bitmap_bh = NULL;
         struct super_block *sb = inode->i_sb;
         struct ext4_allocation_context *ac = NULL;
         struct ext4_group_desc *gdp;
         struct ext4_super_block *es;
+ +      unsigned long freed = 0;
         unsigned int overflow;
         ext4_grpblk_t bit;
         struct buffer_head *gd_bh;
@@@ -4473,16 -4443,13 +4473,16 @@@
         int err = 0;
         int ret;
   
- -      *freed = 0;
+ +      if (bh) {
+ +              if (block)
+ +                      BUG_ON(block != bh->b_blocknr);
+ +              else
+ +                      block = bh->b_blocknr;
+ +      }
   
         sbi = EXT4_SB(sb);
         es = EXT4_SB(sb)->s_es;
- -      if (block < le32_to_cpu(es->s_first_data_block) ||
- -          block + count < block ||
- -          block + count > ext4_blocks_count(es)) {
+ +      if (!ext4_data_block_valid(sbi, block, count)) {
                 ext4_error(sb, __func__,
                             "Freeing blocks not in datazone - "
                             "block = %llu, count = %lu", block, count);
@@@ -4490,32 -4457,7 +4490,32 @@@
         }
   
         ext4_debug("freeing block %llu\n", block);
- -      trace_ext4_free_blocks(inode, block, count, metadata);
+ +      trace_ext4_free_blocks(inode, block, count, flags);
+ +
+ +      if (flags & EXT4_FREE_BLOCKS_FORGET) {
+ +              struct buffer_head *tbh = bh;
+ +              int i;
+ +
+ +              BUG_ON(bh && (count > 1));
+ +
+ +              for (i = 0; i < count; i++) {
+ +                      if (!bh)
+ +                              tbh = sb_find_get_block(inode->i_sb,
+ +                                                      block + i);
+ +                      ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+ +                                  inode, tbh, block + i);
+ +              }
+ +      }
+ +
+ +      /* 
+ +       * We need to make sure we don't reuse the freed block until
+ +       * after the transaction is committed, which we can do by
+ +       * treating the block as metadata, below.  We make an
+ +       * exception if the inode is to be written in writeback mode
+ +       * since writeback mode has weak data consistency guarantees.
+ +       */
+ +      if (!ext4_should_writeback_data(inode))
+ +              flags |= EXT4_FREE_BLOCKS_METADATA;
   
         ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
         if (ac) {
@@@ -4591,8 -4533,7 +4591,8 @@@ do_more
         err = ext4_mb_load_buddy(sb, block_group, &e4b);
         if (err)
                 goto error_return;
- -      if (metadata && ext4_handle_valid(handle)) {
+ +
+ +      if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
                 struct ext4_free_data *new_entry;
                 /*
                  * blocks being freed are metadata. these blocks shouldn't
@@@ -4631,7 -4572,7 +4631,7 @@@
   
         ext4_mb_release_desc(&e4b);
   
- -      *freed += count;
+ +      freed += count;
   
         /* We dirtied the bitmap block */
         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@@ -4651,8 -4592,6 +4651,8 @@@
         }
         sb->s_dirt = 1;
   error_return:
+ +      if (freed)
+ +              vfs_dq_free_block(inode, freed);
         brelse(bitmap_bh);
         ext4_std_error(sb, err);
         if (ac)
diff --combined fs/xfs/xfs_mount.c

index 66a888a9ad6f87a0f913bc6b1508d0f8897927fb,ccafe8ef7ad5cab12f78c7fce06b5dfbacb6cb47..bfffd6334abbfae37d804174241bc1a1c17760d3
--- 1/fs/xfs/xfs_mount.c
--- 2/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@@ -583,8 -583,8 +583,8 @@@ xfs_readsb(xfs_mount_t *mp, int flags
         sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
         extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
   
- -      bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
- -                              BTOBB(sector_size), extra_flags);
+ +      bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
+ +                        extra_flags);
         if (!bp || XFS_BUF_ISERROR(bp)) {
                 xfs_fs_mount_cmn_err(flags, "SB read failed");
                 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@@ -624,8 -624,8 +624,8 @@@
                 XFS_BUF_UNMANAGE(bp);
                 xfs_buf_relse(bp);
                 sector_size = mp->m_sb.sb_sectsize;
- -              bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
- -                                      BTOBB(sector_size), extra_flags);
+ +              bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+ +                                BTOBB(sector_size), extra_flags);
                 if (!bp || XFS_BUF_ISERROR(bp)) {
                         xfs_fs_mount_cmn_err(flags, "SB re-read failed");
                         error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@@ -1471,7 -1471,7 +1471,7 @@@ xfs_log_sbcount
         if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
                 return 0;
   
- -      tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
+ +      tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
         error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
                                         XFS_DEFAULT_LOG_COUNT);
         if (error) {
@@@ -2123,7 -2123,7 +2123,7 @@@ xfs_icsb_destroy_counters
         mutex_destroy(&mp->m_icsb_mutex);
   }
   
- -STATIC_INLINE void
+ +STATIC void
   xfs_icsb_lock_cntr(
         xfs_icsb_cnts_t *icsbp)
   {
@@@ -2132,7 -2132,7 +2132,7 @@@
         }
   }
   
- -STATIC_INLINE void
+ +STATIC void
   xfs_icsb_unlock_cntr(
         xfs_icsb_cnts_t *icsbp)
   {
@@@ -2140,7 -2140,7 +2140,7 @@@
   }
   
   
- -STATIC_INLINE void
+ +STATIC void
   xfs_icsb_lock_all_counters(
         xfs_mount_t     *mp)
   {
@@@ -2153,7 -2153,7 +2153,7 @@@
         }
   }
   
- -STATIC_INLINE void
+ +STATIC void
   xfs_icsb_unlock_all_counters(
         xfs_mount_t     *mp)
   {
@@@ -2389,12 -2389,12 +2389,12 @@@ xfs_icsb_modify_counters
   {
         xfs_icsb_cnts_t *icsbp;
         long long       lcounter;       /* long counter for 64 bit fields */
-       int             cpu, ret = 0;
+       int             ret = 0;
   
         might_sleep();
   again:
-       cpu = get_cpu();
-       icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu);
+       preempt_disable();
+       icsbp = this_cpu_ptr(mp->m_sb_cnts);
   
         /*
          * if the counter is disabled, go to slow path
@@@ -2438,11 -2438,11 +2438,11 @@@
                 break;
         }
         xfs_icsb_unlock_cntr(icsbp);
-       put_cpu();
+       preempt_enable();
         return 0;
   
   slow_path:
-       put_cpu();
+       preempt_enable();
   
         /*
          * serialise with a mutex so we don't burn lots of cpu on
@@@ -2490,7 -2490,7 +2490,7 @@@
   
   balance_counter:
         xfs_icsb_unlock_cntr(icsbp);
-       put_cpu();
+       preempt_enable();
   
         /*
          * We may have multiple threads here if multiple per-cpu
diff --combined include/net/neighbour.h

index 0302f31a2fb7e53956f089efac74848fbd15466d,f28403ff7648e286c85d131d1a848f01f1d35489..b0173202cad96f3a8aa55df85d16b2d11a62b608
--- 1/include/net/neighbour.h
--- 2/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@@ -37,7 -37,8 +37,7 @@@
   
   struct neighbour;
   
- -struct neigh_parms
- -{
+ +struct neigh_parms {
   #ifdef CONFIG_NET_NS
         struct net *net;
   #endif
@@@ -69,7 -70,8 +69,7 @@@
         int     locktime;
   };
   
- -struct neigh_statistics
- -{
+ +struct neigh_statistics {
         unsigned long allocs;           /* number of allocated neighs */
         unsigned long destroys;         /* number of destroyed neighs */
         unsigned long hash_grows;       /* number of hash resizes */
@@@ -88,14 -90,10 +88,9 @@@
         unsigned long unres_discards;   /* number of unresolved drops */
   };
   
- #define NEIGH_CACHE_STAT_INC(tbl, field)                              \
-       do {                                                            \
-               preempt_disable();                                      \
-               (per_cpu_ptr((tbl)->stats, smp_processor_id())->field)++; \
-               preempt_enable();                                       \
-       } while (0)
+ #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
   
- -struct neighbour
- -{
+ +struct neighbour {
         struct neighbour        *next;
         struct neigh_table      *tbl;
         struct neigh_parms      *parms;
@@@ -119,7 -117,8 +114,7 @@@
         u8                      primary_key[0];
   };
   
- -struct neigh_ops
- -{
+ +struct neigh_ops {
         int                     family;
         void                    (*solicit)(struct neighbour *, struct sk_buff*);
         void                    (*error_report)(struct neighbour *, struct sk_buff*);
@@@ -129,7 -128,8 +124,7 @@@
         int                     (*queue_xmit)(struct sk_buff*);
   };
   
- -struct pneigh_entry
- -{
+ +struct pneigh_entry {
         struct pneigh_entry     *next;
   #ifdef CONFIG_NET_NS
         struct net              *net;
@@@ -144,7 -144,8 +139,7 @@@
    */
   
   
- -struct neigh_table
- -{
+ +struct neigh_table {
         struct neigh_table      *next;
         int                     family;
         int                     entry_size;
@@@ -258,7 -259,8 +253,7 @@@ extern int                 neigh_sysctl_register(stru
                                                       struct neigh_parms *p,
                                                       int p_id, int pdev_id,
                                                       char *p_name,
- -                                                    proc_handler *proc_handler,
- -                                                    ctl_handler *strategy);
+ +                                                    proc_handler *proc_handler);
   extern void                   neigh_sysctl_unregister(struct neigh_parms *p);
   
   static inline void __neigh_parms_put(struct neigh_parms *parms)
diff --combined include/net/netfilter/nf_conntrack.h

index 5cf7270e3ffc3e94c6d80bd79168056e9c1e539f,dde549779e422d113bd7e2025e9c391642326a87..a0904adfb8f7aba343152a5af47aa7e818c3b39d
--- 1/include/net/netfilter/nf_conntrack.h
--- 2/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@@ -255,9 -255,11 +255,9 @@@ static inline bool nf_ct_kill(struct nf
   }
   
   /* These are for NAT.  Icky. */
- -/* Update TCP window tracking data when NAT mangles the packet */
- -extern void nf_conntrack_tcp_update(const struct sk_buff *skb,
- -                                  unsigned int dataoff,
- -                                  struct nf_conn *ct, int dir,
- -                                  s16 offset);
+ +extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
+ +                             enum ip_conntrack_dir dir,
+ +                             u32 seq);
   
   /* Fake conntrack entry for untracked connections */
   extern struct nf_conn nf_conntrack_untracked;
@@@ -293,11 -295,11 +293,11 @@@ extern unsigned int nf_conntrack_htable
   extern unsigned int nf_conntrack_max;
   
   #define NF_CT_STAT_INC(net, count)    \
-       (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+       __this_cpu_inc((net)->ct.stat->count)
   #define NF_CT_STAT_INC_ATOMIC(net, count)             \
   do {                                                  \
         local_bh_disable();                             \
-       per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++;   \
+       __this_cpu_inc((net)->ct.stat->count);          \
         local_bh_enable();                              \
   } while (0)
   
diff --combined kernel/lockdep.c

index 4f8df01dbe51ad05e1957fa7e1eecd20af7e0765,8631320a50d0fea1969968743f4a5c5d6b135121..429540c70d3f497ae7fc58998b0f248817a2cbf9
--- 1/kernel/lockdep.c
--- 2/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@ -49,7 -49,7 +49,7 @@@
   #include "lockdep_internals.h"
   
   #define CREATE_TRACE_POINTS
- -#include <trace/events/lockdep.h>
+ +#include <trace/events/lock.h>
   
   #ifdef CONFIG_PROVE_LOCKING
   int prove_locking = 1;
@@@ -140,13 -140,9 +140,14 @@@ static inline struct lock_class *hlock_
   }
   
   #ifdef CONFIG_LOCK_STAT
- static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
+                     cpu_lock_stats);
   
+ +static inline u64 lockstat_clock(void)
+ +{
+ +      return cpu_clock(smp_processor_id());
+ +}
+ +
   static int lock_point(unsigned long points[], unsigned long ip)
   {
         int i;
@@@ -163,12 -159,12 +164,12 @@@
         return i;
   }
   
- -static void lock_time_inc(struct lock_time *lt, s64 time)
+ +static void lock_time_inc(struct lock_time *lt, u64 time)
   {
         if (time > lt->max)
                 lt->max = time;
   
- -      if (time < lt->min || !lt->min)
+ +      if (time < lt->min || !lt->nr)
                 lt->min = time;
   
         lt->total += time;
@@@ -177,15 -173,8 +178,15 @@@
   
   static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
   {
- -      dst->min += src->min;
- -      dst->max += src->max;
+ +      if (!src->nr)
+ +              return;
+ +
+ +      if (src->max > dst->max)
+ +              dst->max = src->max;
+ +
+ +      if (src->min < dst->min || !dst->nr)
+ +              dst->min = src->min;
+ +
         dst->total += src->total;
         dst->nr += src->nr;
   }
@@@ -198,7 -187,7 +199,7 @@@ struct lock_class_stats lock_stats(stru
         memset(&stats, 0, sizeof(struct lock_class_stats));
         for_each_possible_cpu(cpu) {
                 struct lock_class_stats *pcs =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
   
                 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                         stats.contention_point[i] += pcs->contention_point[i];
@@@ -225,7 -214,7 +226,7 @@@ void clear_lock_stats(struct lock_clas
   
         for_each_possible_cpu(cpu) {
                 struct lock_class_stats *cpu_stats =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
   
                 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
         }
@@@ -235,23 -224,23 +236,23 @@@
   
   static struct lock_class_stats *get_lock_stats(struct lock_class *class)
   {
-       return &get_cpu_var(lock_stats)[class - lock_classes];
+       return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
   }
   
   static void put_lock_stats(struct lock_class_stats *stats)
   {
-       put_cpu_var(lock_stats);
+       put_cpu_var(cpu_lock_stats);
   }
   
   static void lock_release_holdtime(struct held_lock *hlock)
   {
         struct lock_class_stats *stats;
- -      s64 holdtime;
+ +      u64 holdtime;
   
         if (!lock_stat)
                 return;
   
- -      holdtime = sched_clock() - hlock->holdtime_stamp;
+ +      holdtime = lockstat_clock() - hlock->holdtime_stamp;
   
         stats = get_lock_stats(hlock_class(hlock));
         if (hlock->read)
@@@ -386,8 -375,7 +387,8 @@@ static int save_trace(struct stack_trac
          * complete trace that maxes out the entries provided will be reported
          * as incomplete, friggin useless </rant>
          */
- -      if (trace->entries[trace->nr_entries-1] == ULONG_MAX)
+ +      if (trace->nr_entries != 0 &&
+ +          trace->entries[trace->nr_entries-1] == ULONG_MAX)
                 trace->nr_entries--;
   
         trace->max_entries = trace->nr_entries;
@@@ -2805,7 -2793,7 +2806,7 @@@ static int __lock_acquire(struct lockde
         hlock->references = references;
   #ifdef CONFIG_LOCK_STAT
         hlock->waittime_stamp = 0;
- -      hlock->holdtime_stamp = sched_clock();
+ +      hlock->holdtime_stamp = lockstat_clock();
   #endif
   
         if (check == 2 && !mark_irqflags(curr, hlock))
@@@ -3335,7 -3323,7 +3336,7 @@@ found_it
         if (hlock->instance != lock)
                 return;
   
- -      hlock->waittime_stamp = sched_clock();
+ +      hlock->waittime_stamp = lockstat_clock();
   
         contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
         contending_point = lock_point(hlock_class(hlock)->contending_point,
@@@ -3358,7 -3346,8 +3359,7 @@@ __lock_acquired(struct lockdep_map *loc
         struct held_lock *hlock, *prev_hlock;
         struct lock_class_stats *stats;
         unsigned int depth;
- -      u64 now;
- -      s64 waittime = 0;
+ +      u64 now, waittime = 0;
         int i, cpu;
   
         depth = curr->lockdep_depth;
@@@ -3386,7 -3375,7 +3387,7 @@@ found_it
   
         cpu = smp_processor_id();
         if (hlock->waittime_stamp) {
- -              now = sched_clock();
+ +              now = lockstat_clock();
                 waittime = now - hlock->waittime_stamp;
                 hlock->holdtime_stamp = now;
         }
diff --combined kernel/module.c

index 5842a71cf0527163c960cc8c41d549a5033e3443,64787cddeb5ee34c0ffb891e711e89d4f5b71b79..12afc5a3ddd3a73972604050695ff4ab4ef708f6
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -370,8 -370,6 +370,6 @@@ EXPORT_SYMBOL_GPL(find_module)
   
   #ifdef CONFIG_SMP
   
- #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
- 
   static void *percpu_modalloc(unsigned long size, unsigned long align,
                              const char *name)
   {
@@@ -395,154 -393,6 +393,6 @@@ static void percpu_modfree(void *freeme
         free_percpu(freeme);
   }
   
- #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
- 
- /* Number of blocks used and allocated. */
- static unsigned int pcpu_num_used, pcpu_num_allocated;
- /* Size of each block.  -ve means used. */
- static int *pcpu_size;
- 
- static int split_block(unsigned int i, unsigned short size)
- {
-       /* Reallocation required? */
-       if (pcpu_num_used + 1 > pcpu_num_allocated) {
-               int *new;
- 
-               new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
-                              GFP_KERNEL);
-               if (!new)
-                       return 0;
- 
-               pcpu_num_allocated *= 2;
-               pcpu_size = new;
-       }
- 
-       /* Insert a new subblock */
-       memmove(&pcpu_size[i+1], &pcpu_size[i],
-               sizeof(pcpu_size[0]) * (pcpu_num_used - i));
-       pcpu_num_used++;
- 
-       pcpu_size[i+1] -= size;
-       pcpu_size[i] = size;
-       return 1;
- }
- 
- static inline unsigned int block_size(int val)
- {
-       if (val < 0)
-               return -val;
-       return val;
- }
- 
- static void *percpu_modalloc(unsigned long size, unsigned long align,
-                            const char *name)
- {
-       unsigned long extra;
-       unsigned int i;
-       void *ptr;
-       int cpu;
- 
-       if (align > PAGE_SIZE) {
-               printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
-                      name, align, PAGE_SIZE);
-               align = PAGE_SIZE;
-       }
- 
-       ptr = __per_cpu_start;
-       for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-               /* Extra for alignment requirement. */
-               extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
-               BUG_ON(i == 0 && extra != 0);
- 
-               if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
-                       continue;
- 
-               /* Transfer extra to previous block. */
-               if (pcpu_size[i-1] < 0)
-                       pcpu_size[i-1] -= extra;
-               else
-                       pcpu_size[i-1] += extra;
-               pcpu_size[i] -= extra;
-               ptr += extra;
- 
-               /* Split block if warranted */
-               if (pcpu_size[i] - size > sizeof(unsigned long))
-                       if (!split_block(i, size))
-                               return NULL;
- 
-               /* add the per-cpu scanning areas */
-               for_each_possible_cpu(cpu)
-                       kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
-                                      GFP_KERNEL);
- 
-               /* Mark allocated */
-               pcpu_size[i] = -pcpu_size[i];
-               return ptr;
-       }
- 
-       printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-              size);
-       return NULL;
- }
- 
- static void percpu_modfree(void *freeme)
- {
-       unsigned int i;
-       void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
-       int cpu;
- 
-       /* First entry is core kernel percpu data. */
-       for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-               if (ptr == freeme) {
-                       pcpu_size[i] = -pcpu_size[i];
-                       goto free;
-               }
-       }
-       BUG();
- 
-  free:
-       /* remove the per-cpu scanning areas */
-       for_each_possible_cpu(cpu)
-               kmemleak_free(freeme + per_cpu_offset(cpu));
- 
-       /* Merge with previous? */
-       if (pcpu_size[i-1] >= 0) {
-               pcpu_size[i-1] += pcpu_size[i];
-               pcpu_num_used--;
-               memmove(&pcpu_size[i], &pcpu_size[i+1],
-                       (pcpu_num_used - i) * sizeof(pcpu_size[0]));
-               i--;
-       }
-       /* Merge with next? */
-       if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
-               pcpu_size[i] += pcpu_size[i+1];
-               pcpu_num_used--;
-               memmove(&pcpu_size[i+1], &pcpu_size[i+2],
-                       (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
-       }
- }
- 
- static int percpu_modinit(void)
- {
-       pcpu_num_used = 2;
-       pcpu_num_allocated = 2;
-       pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
-                           GFP_KERNEL);
-       /* Static in-kernel percpu data (used). */
-       pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
-       /* Free room. */
-       pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
-       if (pcpu_size[1] < 0) {
-               printk(KERN_ERR "No per-cpu room for modules.\n");
-               pcpu_num_used = 1;
-       }
- 
-       return 0;
- }
- __initcall(percpu_modinit);
- 
- #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
- 
   static unsigned int find_pcpusec(Elf_Ehdr *hdr,
                                  Elf_Shdr *sechdrs,
                                  const char *secstrings)
@@@ -1187,8 -1037,7 +1037,8 @@@ static void add_sect_attrs(struct modul
   
         /* Count loaded sections and allocate structures */
         for (i = 0; i < nsect; i++)
- -              if (sechdrs[i].sh_flags & SHF_ALLOC)
+ +              if (sechdrs[i].sh_flags & SHF_ALLOC
+ +                  && sechdrs[i].sh_size)
                         nloaded++;
         size[0] = ALIGN(sizeof(*sect_attrs)
                         + nloaded * sizeof(sect_attrs->attrs[0]),
@@@ -1208,8 -1057,6 +1058,8 @@@
         for (i = 0; i < nsect; i++) {
                 if (! (sechdrs[i].sh_flags & SHF_ALLOC))
                         continue;
+ +              if (!sechdrs[i].sh_size)
+ +                      continue;
                 sattr->address = sechdrs[i].sh_addr;
                 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
                                         GFP_KERNEL);
diff --combined kernel/rcutorture.c

index a621a67ef4e3bd5e4aa97522cdff8b443db15e32,178967b6434e8e27061ac7b226b56c669de7e607..9bb52177af02a3e20aa347e3b65c0a236caa1922
--- 1/kernel/rcutorture.c
--- 2/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@@ -327,11 -327,6 +327,11 @@@ rcu_torture_cb(struct rcu_head *p
                 cur_ops->deferred_free(rp);
   }
   
+ +static int rcu_no_completed(void)
+ +{
+ +      return 0;
+ +}
+ +
   static void rcu_torture_deferred_free(struct rcu_torture *p)
   {
         call_rcu(&p->rtort_rcu, rcu_torture_cb);
@@@ -393,21 -388,6 +393,21 @@@ static struct rcu_torture_ops rcu_sync_
         .name           = "rcu_sync"
   };
   
+ +static struct rcu_torture_ops rcu_expedited_ops = {
+ +      .init           = rcu_sync_torture_init,
+ +      .cleanup        = NULL,
+ +      .readlock       = rcu_torture_read_lock,
+ +      .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+ +      .readunlock     = rcu_torture_read_unlock,
+ +      .completed      = rcu_no_completed,
+ +      .deferred_free  = rcu_sync_torture_deferred_free,
+ +      .sync           = synchronize_rcu_expedited,
+ +      .cb_barrier     = NULL,
+ +      .stats          = NULL,
+ +      .irq_capable    = 1,
+ +      .name           = "rcu_expedited"
+ +};
+ +
   /*
    * Definitions for rcu_bh torture testing.
    */
@@@ -567,25 -547,6 +567,25 @@@ static struct rcu_torture_ops srcu_ops 
         .name           = "srcu"
   };
   
+ +static void srcu_torture_synchronize_expedited(void)
+ +{
+ +      synchronize_srcu_expedited(&srcu_ctl);
+ +}
+ +
+ +static struct rcu_torture_ops srcu_expedited_ops = {
+ +      .init           = srcu_torture_init,
+ +      .cleanup        = srcu_torture_cleanup,
+ +      .readlock       = srcu_torture_read_lock,
+ +      .read_delay     = srcu_read_delay,
+ +      .readunlock     = srcu_torture_read_unlock,
+ +      .completed      = srcu_torture_completed,
+ +      .deferred_free  = rcu_sync_torture_deferred_free,
+ +      .sync           = srcu_torture_synchronize_expedited,
+ +      .cb_barrier     = NULL,
+ +      .stats          = srcu_torture_stats,
+ +      .name           = "srcu_expedited"
+ +};
+ +
   /*
    * Definitions for sched torture testing.
    */
@@@ -601,6 -562,11 +601,6 @@@ static void sched_torture_read_unlock(i
         preempt_enable();
   }
   
- -static int sched_torture_completed(void)
- -{
- -      return 0;
- -}
- -
   static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
   {
         call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@@@ -617,7 -583,7 +617,7 @@@ static struct rcu_torture_ops sched_op
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
- -      .completed      = sched_torture_completed,
+ +      .completed      = rcu_no_completed,
         .deferred_free  = rcu_sched_torture_deferred_free,
         .sync           = sched_torture_synchronize,
         .cb_barrier     = rcu_barrier_sched,
@@@ -626,13 -592,13 +626,13 @@@
         .name           = "sched"
   };
   
- -static struct rcu_torture_ops sched_ops_sync = {
+ +static struct rcu_torture_ops sched_sync_ops = {
         .init           = rcu_sync_torture_init,
         .cleanup        = NULL,
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
- -      .completed      = sched_torture_completed,
+ +      .completed      = rcu_no_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
         .sync           = sched_torture_synchronize,
         .cb_barrier     = NULL,
@@@ -640,13 -606,15 +640,13 @@@
         .name           = "sched_sync"
   };
   
- -extern int rcu_expedited_torture_stats(char *page);
- -
   static struct rcu_torture_ops sched_expedited_ops = {
         .init           = rcu_sync_torture_init,
         .cleanup        = NULL,
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
- -      .completed      = sched_torture_completed,
+ +      .completed      = rcu_no_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
         .sync           = synchronize_sched_expedited,
         .cb_barrier     = NULL,
@@@ -682,7 -650,7 +682,7 @@@ rcu_torture_writer(void *arg
                 old_rp = rcu_torture_current;
                 rp->rtort_mbtest = 1;
                 rcu_assign_pointer(rcu_torture_current, rp);
- -              smp_wmb();
+ +              smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
                 if (old_rp) {
                         i = old_rp->rtort_pipe_count;
                         if (i > RCU_TORTURE_PIPE_LEN)
@@@ -763,13 -731,13 +763,13 @@@ static void rcu_torture_timer(unsigned 
                 /* Should not happen, but... */
                 pipe_count = RCU_TORTURE_PIPE_LEN;
         }
-       ++__get_cpu_var(rcu_torture_count)[pipe_count];
+       __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
         completed = cur_ops->completed() - completed;
         if (completed > RCU_TORTURE_PIPE_LEN) {
                 /* Should not happen, but... */
                 completed = RCU_TORTURE_PIPE_LEN;
         }
-       ++__get_cpu_var(rcu_torture_batch)[completed];
+       __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
         preempt_enable();
         cur_ops->readunlock(idx);
   }
@@@ -818,13 -786,13 +818,13 @@@ rcu_torture_reader(void *arg
                         /* Should not happen, but... */
                         pipe_count = RCU_TORTURE_PIPE_LEN;
                 }
-               ++__get_cpu_var(rcu_torture_count)[pipe_count];
+               __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
                 completed = cur_ops->completed() - completed;
                 if (completed > RCU_TORTURE_PIPE_LEN) {
                         /* Should not happen, but... */
                         completed = RCU_TORTURE_PIPE_LEN;
                 }
-               ++__get_cpu_var(rcu_torture_batch)[completed];
+               __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
                 preempt_enable();
                 cur_ops->readunlock(idx);
                 schedule();
@@@ -1131,10 -1099,9 +1131,10 @@@ rcu_torture_init(void
         int cpu;
         int firsterr = 0;
         static struct rcu_torture_ops *torture_ops[] =
- -              { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
- -                &sched_expedited_ops,
- -                &srcu_ops, &sched_ops, &sched_ops_sync, };
+ +              { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
+ +                &rcu_bh_ops, &rcu_bh_sync_ops,
+ +                &srcu_ops, &srcu_expedited_ops,
+ +                &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
   
         mutex_lock(&fullstop_mutex);
   
@@@ -1145,12 -1112,8 +1145,12 @@@
                         break;
         }
         if (i == ARRAY_SIZE(torture_ops)) {
- -              printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
+ +              printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
                        torture_type);
+ +              printk(KERN_ALERT "rcu-torture types:");
+ +              for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+ +                      printk(KERN_ALERT " %s", torture_ops[i]->name);
+ +              printk(KERN_ALERT "\n");
                 mutex_unlock(&fullstop_mutex);
                 return -EINVAL;
         }
diff --combined kernel/sched.c

index ff39cadf621e91834dcdc9116d123f8912eab225,eecf070ffd1a1deeef02a1588b38c5cbcfe35968..fd05861b2111005a5a88b386d7d77ee036f5e160
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -298,7 -298,7 +298,7 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
   
   #ifdef CONFIG_RT_GROUP_SCHED
   static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
- static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
   #endif /* CONFIG_RT_GROUP_SCHED */
   #else /* !CONFIG_USER_SCHED */
   #define root_task_group init_task_group
@@@ -309,8 -309,6 +309,8 @@@
    */
   static DEFINE_SPINLOCK(task_group_lock);
   
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +
   #ifdef CONFIG_SMP
   static int root_task_group_empty(void)
   {
@@@ -318,6 -316,7 +318,6 @@@
   }
   #endif
   
- -#ifdef CONFIG_FAIR_GROUP_SCHED
   #ifdef CONFIG_USER_SCHED
   # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
   #else /* !CONFIG_USER_SCHED */
@@@ -535,12 -534,14 +535,12 @@@ struct rq 
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
   #ifdef CONFIG_NO_HZ
- -      unsigned long last_tick_seen;
         unsigned char in_nohz_recently;
   #endif
         /* capture load from *all* tasks on this cpu: */
         struct load_weight load;
         unsigned long nr_load_updates;
         u64 nr_switches;
- -      u64 nr_migrations_in;
   
         struct cfs_rq cfs;
         struct rt_rq rt;
@@@ -589,8 -590,6 +589,8 @@@
   
         u64 rt_avg;
         u64 age_stamp;
+ +      u64 idle_stamp;
+ +      u64 avg_idle;
   #endif
   
         /* calc_load related fields */
@@@ -677,7 -676,6 +677,7 @@@ inline void update_rq_clock(struct rq *
   
   /**
    * runqueue_is_locked
+ + * @cpu: the processor in question.
    *
    * Returns true if the current cpu runqueue is locked.
    * This interface allows printk to be called with the runqueue lock
@@@ -772,7 -770,7 +772,7 @@@ sched_feat_write(struct file *filp, con
         if (!sched_feat_names[i])
                 return -EINVAL;
   
- -      filp->f_pos += cnt;
+ +      *ppos += cnt;
   
         return cnt;
   }
@@@ -814,7 -812,6 +814,7 @@@ const_debug unsigned int sysctl_sched_n
    * default: 0.25ms
    */
   unsigned int sysctl_sched_shares_ratelimit = 250000;
+ +unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
   
   /*
    * Inject some fuzzyness into changing the per-cpu group shares
@@@ -1615,7 -1612,7 +1615,7 @@@ static void update_group_shares_cpu(str
    */
   static int tg_shares_up(struct task_group *tg, void *data)
   {
- -      unsigned long weight, rq_weight = 0, shares = 0;
+ +      unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
         unsigned long *usd_rq_weight;
         struct sched_domain *sd = data;
         unsigned long flags;
@@@ -1631,7 -1628,6 +1631,7 @@@
                 weight = tg->cfs_rq[i]->load.weight;
                 usd_rq_weight[i] = weight;
   
+ +              rq_weight += weight;
                 /*
                  * If there are currently no tasks on the cpu pretend there
                  * is one of average load so that when a new task gets to
@@@ -1640,13 -1636,10 +1640,13 @@@
                 if (!weight)
                         weight = NICE_0_LOAD;
   
- -              rq_weight += weight;
+ +              sum_weight += weight;
                 shares += tg->cfs_rq[i]->shares;
         }
   
+ +      if (!rq_weight)
+ +              rq_weight = sum_weight;
+ +
         if ((!shares && rq_weight) || shares > tg->shares)
                 shares = tg->shares;
   
@@@ -1815,22 -1808,6 +1815,22 @@@ static void cfs_rq_set_shares(struct cf
   #endif
   
   static void calc_load_account_active(struct rq *this_rq);
+ +static void update_sysctl(void);
+ +static int get_update_sysctl_factor(void);
+ +
+ +static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+ +{
+ +      set_task_rq(p, cpu);
+ +#ifdef CONFIG_SMP
+ +      /*
+ +       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+ +       * successfuly executed on another CPU. We must ensure that updates of
+ +       * per-task data have been completed by this moment.
+ +       */
+ +      smp_wmb();
+ +      task_thread_info(p)->cpu = cpu;
+ +#endif
+ +}
   
   #include "sched_stats.h"
   #include "sched_idletask.c"
@@@ -1988,6 -1965,20 +1988,6 @@@ inline int task_curr(const struct task_
         return cpu_curr(task_cpu(p)) == p;
   }
   
- -static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
- -{
- -      set_task_rq(p, cpu);
- -#ifdef CONFIG_SMP
- -      /*
- -       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
- -       * successfuly executed on another CPU. We must ensure that updates of
- -       * per-task data have been completed by this moment.
- -       */
- -      smp_wmb();
- -      task_thread_info(p)->cpu = cpu;
- -#endif
- -}
- -
   static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                                        const struct sched_class *prev_class,
                                        int oldprio, int running)
@@@ -2000,39 -1991,6 +2000,39 @@@
                 p->sched_class->prio_changed(rq, p, oldprio, running);
   }
   
+ +/**
+ + * kthread_bind - bind a just-created kthread to a cpu.
+ + * @p: thread created by kthread_create().
+ + * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ + *
+ + * Description: This function is equivalent to set_cpus_allowed(),
+ + * except that @cpu doesn't need to be online, and the thread must be
+ + * stopped (i.e., just returned from kthread_create()).
+ + *
+ + * Function lives here instead of kthread.c because it messes with
+ + * scheduler internals which require locking.
+ + */
+ +void kthread_bind(struct task_struct *p, unsigned int cpu)
+ +{
+ +      struct rq *rq = cpu_rq(cpu);
+ +      unsigned long flags;
+ +
+ +      /* Must have done schedule() in kthread() before we set_task_cpu */
+ +      if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ +              WARN_ON(1);
+ +              return;
+ +      }
+ +
+ +      spin_lock_irqsave(&rq->lock, flags);
+ +      update_rq_clock(rq);
+ +      set_task_cpu(p, cpu);
+ +      p->cpus_allowed = cpumask_of_cpu(cpu);
+ +      p->rt.nr_cpus_allowed = 1;
+ +      p->flags |= PF_THREAD_BOUND;
+ +      spin_unlock_irqrestore(&rq->lock, flags);
+ +}
+ +EXPORT_SYMBOL(kthread_bind);
+ +
   #ifdef CONFIG_SMP
   /*
    * Is this task likely cache-hot:
@@@ -2045,7 -2003,7 +2045,7 @@@ task_hot(struct task_struct *p, u64 now
         /*
          * Buddy candidates are cache hot:
          */
- -      if (sched_feat(CACHE_HOT_BUDDY) &&
+ +      if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
                         (&p->se == cfs_rq_of(&p->se)->next ||
                          &p->se == cfs_rq_of(&p->se)->last))
                 return 1;
@@@ -2067,13 -2025,30 +2067,13 @@@
   void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
   {
         int old_cpu = task_cpu(p);
- -      struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
         struct cfs_rq *old_cfsrq = task_cfs_rq(p),
                       *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
- -      u64 clock_offset;
- -
- -      clock_offset = old_rq->clock - new_rq->clock;
   
         trace_sched_migrate_task(p, new_cpu);
   
- -#ifdef CONFIG_SCHEDSTATS
- -      if (p->se.wait_start)
- -              p->se.wait_start -= clock_offset;
- -      if (p->se.sleep_start)
- -              p->se.sleep_start -= clock_offset;
- -      if (p->se.block_start)
- -              p->se.block_start -= clock_offset;
- -#endif
         if (old_cpu != new_cpu) {
                 p->se.nr_migrations++;
- -              new_rq->nr_migrations_in++;
- -#ifdef CONFIG_SCHEDSTATS
- -              if (task_hot(p, old_rq->clock, NULL))
- -                      schedstat_inc(p, se.nr_forced2_migrations);
- -#endif
                 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
                                      1, 1, NULL, 0);
         }
@@@ -2106,7 -2081,6 +2106,7 @@@ migrate_task(struct task_struct *p, in
          * it is sufficient to simply update the task's cpu field.
          */
         if (!p->se.on_rq && !task_running(rq, p)) {
+ +              update_rq_clock(rq);
                 set_task_cpu(p, dest_cpu);
                 return 0;
         }
@@@ -2314,14 -2288,6 +2314,14 @@@ void task_oncpu_function_call(struct ta
         preempt_enable();
   }
   
+ +#ifdef CONFIG_SMP
+ +static inline
+ +int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+ +{
+ +      return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+ +}
+ +#endif
+ +
   /***
    * try_to_wake_up - wake up a thread
    * @p: the to-be-woken-up thread
@@@ -2341,7 -2307,7 +2341,7 @@@ static int try_to_wake_up(struct task_s
   {
         int cpu, orig_cpu, this_cpu, success = 0;
         unsigned long flags;
- -      struct rq *rq;
+ +      struct rq *rq, *orig_rq;
   
         if (!sched_feat(SYNC_WAKEUPS))
                 wake_flags &= ~WF_SYNC;
@@@ -2349,7 -2315,7 +2349,7 @@@
         this_cpu = get_cpu();
   
         smp_wmb();
- -      rq = task_rq_lock(p, &flags);
+ +      rq = orig_rq = task_rq_lock(p, &flags);
         update_rq_clock(rq);
         if (!(p->state & state))
                 goto out;
@@@ -2373,15 -2339,13 +2373,15 @@@
         if (task_contributes_to_load(p))
                 rq->nr_uninterruptible--;
         p->state = TASK_WAKING;
- -      task_rq_unlock(rq, &flags);
+ +      __task_rq_unlock(rq);
   
- -      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+ +      cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
         if (cpu != orig_cpu)
                 set_task_cpu(p, cpu);
   
- -      rq = task_rq_lock(p, &flags);
+ +      rq = __task_rq_lock(p);
+ +      update_rq_clock(rq);
+ +
         WARN_ON(p->state != TASK_WAKING);
         cpu = task_cpu(p);
   
@@@ -2438,17 -2402,6 +2438,17 @@@ out_running
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
                 p->sched_class->task_wake_up(rq, p);
+ +
+ +      if (unlikely(rq->idle_stamp)) {
+ +              u64 delta = rq->clock - rq->idle_stamp;
+ +              u64 max = 2*sysctl_sched_migration_cost;
+ +
+ +              if (delta > max)
+ +                      rq->avg_idle = max;
+ +              else
+ +                      update_avg(&rq->avg_idle, delta);
+ +              rq->idle_stamp = 0;
+ +      }
   #endif
   out:
         task_rq_unlock(rq, &flags);
@@@ -2495,6 -2448,7 +2495,6 @@@ static void __sched_fork(struct task_st
         p->se.avg_overlap               = 0;
         p->se.start_runtime             = 0;
         p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
- -      p->se.avg_running               = 0;
   
   #ifdef CONFIG_SCHEDSTATS
         p->se.wait_start                        = 0;
@@@ -2516,6 -2470,7 +2516,6 @@@
         p->se.nr_failed_migrations_running      = 0;
         p->se.nr_failed_migrations_hot          = 0;
         p->se.nr_forced_migrations              = 0;
- -      p->se.nr_forced2_migrations             = 0;
   
         p->se.nr_wakeups                        = 0;
         p->se.nr_wakeups_sync                   = 0;
@@@ -2555,18 -2510,23 +2555,18 @@@ void sched_fork(struct task_struct *p, 
   
         __sched_fork(p);
   
- -      /*
- -       * Make sure we do not leak PI boosting priority to the child.
- -       */
- -      p->prio = current->normal_prio;
- -
         /*
          * Revert to default priority/policy on fork if requested.
          */
         if (unlikely(p->sched_reset_on_fork)) {
- -              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
+ +              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
                         p->policy = SCHED_NORMAL;
- -
- -              if (p->normal_prio < DEFAULT_PRIO)
- -                      p->prio = DEFAULT_PRIO;
+ +                      p->normal_prio = p->static_prio;
+ +              }
   
                 if (PRIO_TO_NICE(p->static_prio) < 0) {
                         p->static_prio = NICE_TO_PRIO(0);
+ +                      p->normal_prio = p->static_prio;
                         set_load_weight(p);
                 }
   
@@@ -2577,19 -2537,11 +2577,19 @@@
                 p->sched_reset_on_fork = 0;
         }
   
+ +      /*
+ +       * Make sure we do not leak PI boosting priority to the child.
+ +       */
+ +      p->prio = current->normal_prio;
+ +
         if (!rt_prio(p->prio))
                 p->sched_class = &fair_sched_class;
   
+ +      if (p->sched_class->task_fork)
+ +              p->sched_class->task_fork(p);
+ +
   #ifdef CONFIG_SMP
- -      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+ +      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
   #endif
         set_task_cpu(p, cpu);
   
@@@ -2624,7 -2576,19 +2624,7 @@@ void wake_up_new_task(struct task_struc
         rq = task_rq_lock(p, &flags);
         BUG_ON(p->state != TASK_RUNNING);
         update_rq_clock(rq);
- -
- -      p->prio = effective_prio(p);
- -
- -      if (!p->sched_class->task_new || !current->se.on_rq) {
- -              activate_task(rq, p, 0);
- -      } else {
- -              /*
- -               * Let the scheduling class do new task startup
- -               * management (if any):
- -               */
- -              p->sched_class->task_new(rq, p);
- -              inc_nr_running(rq);
- -      }
+ +      activate_task(rq, p, 0);
         trace_sched_wakeup_new(rq, p, 1);
         check_preempt_curr(rq, p, WF_FORK);
   #ifdef CONFIG_SMP
@@@ -2848,14 -2812,14 +2848,14 @@@ context_switch(struct rq *rq, struct ta
          */
         arch_start_context_switch(prev);
   
- -      if (unlikely(!mm)) {
+ +      if (likely(!mm)) {
                 next->active_mm = oldmm;
                 atomic_inc(&oldmm->mm_count);
                 enter_lazy_tlb(oldmm, next);
         } else
                 switch_mm(oldmm, mm, next);
   
- -      if (unlikely(!prev->mm)) {
+ +      if (likely(!prev->mm)) {
                 prev->active_mm = NULL;
                 rq->prev_mm = oldmm;
         }
@@@ -3017,6 -2981,15 +3017,6 @@@ static void calc_load_account_active(st
         }
   }
   
- -/*
- - * Externally visible per-cpu scheduler statistics:
- - * cpu_nr_migrations(cpu) - number of migrations into that cpu
- - */
- -u64 cpu_nr_migrations(int cpu)
- -{
- -      return cpu_rq(cpu)->nr_migrations_in;
- -}
- -
   /*
    * Update rq->cpu_load[] statistics. This function is usually called every
    * scheduler tick (TICK_NSEC).
@@@ -3139,7 -3112,7 +3139,7 @@@ out
   void sched_exec(void)
   {
         int new_cpu, this_cpu = get_cpu();
- -      new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
+ +      new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
         put_cpu();
         if (new_cpu != this_cpu)
                 sched_migrate_task(current, new_cpu);
@@@ -3155,6 -3128,10 +3155,6 @@@ static void pull_task(struct rq *src_rq
         deactivate_task(src_rq, p, 0);
         set_task_cpu(p, this_cpu);
         activate_task(this_rq, p, 0);
- -      /*
- -       * Note that idle threads have a prio of MAX_PRIO, for this test
- -       * to be always true for them.
- -       */
         check_preempt_curr(this_rq, p, 0);
   }
   
@@@ -3677,7 -3654,6 +3677,7 @@@ static void update_group_power(struct s
   
   /**
    * update_sg_lb_stats - Update sched_group's statistics for load balancing.
+ + * @sd: The sched_domain whose statistics are to be updated.
    * @group: sched_group whose statistics are to be updated.
    * @this_cpu: Cpu for which load balance is currently performed.
    * @idle: Idle status of this_cpu
@@@ -4113,7 -4089,7 +4113,7 @@@ static int load_balance(int this_cpu, s
         unsigned long flags;
         struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
   
- -      cpumask_setall(cpus);
+ +      cpumask_copy(cpus, cpu_active_mask);
   
         /*
          * When power savings policy is enabled for the parent domain, idle
@@@ -4276,7 -4252,7 +4276,7 @@@ load_balance_newidle(int this_cpu, stru
         int all_pinned = 0;
         struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
   
- -      cpumask_setall(cpus);
+ +      cpumask_copy(cpus, cpu_active_mask);
   
         /*
          * When power savings policy is enabled for the parent domain, idle
@@@ -4416,11 -4392,6 +4416,11 @@@ static void idle_balance(int this_cpu, 
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
   
+ +      this_rq->idle_stamp = this_rq->clock;
+ +
+ +      if (this_rq->avg_idle < sysctl_sched_migration_cost)
+ +              return;
+ +
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
   
@@@ -4435,10 -4406,8 +4435,10 @@@
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
                         next_balance = sd->last_balance + interval;
- -              if (pulled_task)
+ +              if (pulled_task) {
+ +                      this_rq->idle_stamp = 0;
                         break;
+ +              }
         }
         if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
                 /*
@@@ -4673,7 -4642,7 +4673,7 @@@ int select_nohz_load_balancer(int stop_
                 cpumask_set_cpu(cpu, nohz.cpu_mask);
   
                 /* time for ilb owner also to sleep */
- -              if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+ +              if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
                         if (atomic_read(&nohz.load_balancer) == cpu)
                                 atomic_set(&nohz.load_balancer, -1);
                         return 0;
@@@ -5040,13 -5009,8 +5040,13 @@@ static void account_guest_time(struct t
         p->gtime = cputime_add(p->gtime, cputime);
   
         /* Add guest time to cpustat. */
- -      cpustat->user = cputime64_add(cpustat->user, tmp);
- -      cpustat->guest = cputime64_add(cpustat->guest, tmp);
+ +      if (TASK_NICE(p) > 0) {
+ +              cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ +              cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
+ +      } else {
+ +              cpustat->user = cputime64_add(cpustat->user, tmp);
+ +              cpustat->guest = cputime64_add(cpustat->guest, tmp);
+ +      }
   }
   
   /*
@@@ -5161,86 -5125,60 +5161,86 @@@ void account_idle_ticks(unsigned long t
    * Use precise platform statistics if available:
    */
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING
- -cputime_t task_utime(struct task_struct *p)
+ +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
   {
- -      return p->utime;
+ +      *ut = p->utime;
+ +      *st = p->stime;
   }
   
- -cputime_t task_stime(struct task_struct *p)
+ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
   {
- -      return p->stime;
+ +      struct task_cputime cputime;
+ +
+ +      thread_group_cputime(p, &cputime);
+ +
+ +      *ut = cputime.utime;
+ +      *st = cputime.stime;
   }
   #else
- -cputime_t task_utime(struct task_struct *p)
+ +
+ +#ifndef nsecs_to_cputime
+ +# define nsecs_to_cputime(__nsecs)    nsecs_to_jiffies(__nsecs)
+ +#endif
+ +
+ +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
   {
- -      clock_t utime = cputime_to_clock_t(p->utime),
- -              total = utime + cputime_to_clock_t(p->stime);
- -      u64 temp;
+ +      cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
   
         /*
          * Use CFS's precise accounting:
          */
- -      temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+ +      rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
   
         if (total) {
- -              temp *= utime;
+ +              u64 temp;
+ +
+ +              temp = (u64)(rtime * utime);
                 do_div(temp, total);
- -      }
- -      utime = (clock_t)temp;
+ +              utime = (cputime_t)temp;
+ +      } else
+ +              utime = rtime;
+ +
+ +      /*
+ +       * Compare with previous values, to keep monotonicity:
+ +       */
+ +      p->prev_utime = max(p->prev_utime, utime);
+ +      p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
   
- -      p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
- -      return p->prev_utime;
+ +      *ut = p->prev_utime;
+ +      *st = p->prev_stime;
   }
   
- -cputime_t task_stime(struct task_struct *p)
+ +/*
+ + * Must be called with siglock held.
+ + */
+ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
   {
- -      clock_t stime;
+ +      struct signal_struct *sig = p->signal;
+ +      struct task_cputime cputime;
+ +      cputime_t rtime, utime, total;
   
- -      /*
- -       * Use CFS's precise accounting. (we subtract utime from
- -       * the total, to make sure the total observed by userspace
- -       * grows monotonically - apps rely on that):
- -       */
- -      stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
- -                      cputime_to_clock_t(task_utime(p));
+ +      thread_group_cputime(p, &cputime);
   
- -      if (stime >= 0)
- -              p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+ +      total = cputime_add(cputime.utime, cputime.stime);
+ +      rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
   
- -      return p->prev_stime;
- -}
- -#endif
+ +      if (total) {
+ +              u64 temp;
   
- -inline cputime_t task_gtime(struct task_struct *p)
- -{
- -      return p->gtime;
+ +              temp = (u64)(rtime * cputime.utime);
+ +              do_div(temp, total);
+ +              utime = (cputime_t)temp;
+ +      } else
+ +              utime = rtime;
+ +
+ +      sig->prev_utime = max(sig->prev_utime, utime);
+ +      sig->prev_stime = max(sig->prev_stime,
+ +                            cputime_sub(rtime, sig->prev_utime));
+ +
+ +      *ut = sig->prev_utime;
+ +      *st = sig->prev_stime;
   }
+ +#endif
   
   /*
    * This function gets called by the timer code, with HZ frequency.
@@@ -5375,14 -5313,13 +5375,14 @@@ static inline void schedule_debug(struc
   #endif
   }
   
- -static void put_prev_task(struct rq *rq, struct task_struct *p)
+ +static void put_prev_task(struct rq *rq, struct task_struct *prev)
   {
- -      u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
+ +      if (prev->state == TASK_RUNNING) {
+ +              u64 runtime = prev->se.sum_exec_runtime;
   
- -      update_avg(&p->se.avg_running, runtime);
+ +              runtime -= prev->se.prev_sum_exec_runtime;
+ +              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
   
- -      if (p->state == TASK_RUNNING) {
                 /*
                  * In order to avoid avg_overlap growing stale when we are
                  * indeed overlapping and hence not getting put to sleep, grow
@@@ -5392,9 -5329,12 +5392,9 @@@
                  * correlates to the amount of cache footprint a task can
                  * build up.
                  */
- -              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
- -              update_avg(&p->se.avg_overlap, runtime);
- -      } else {
- -              update_avg(&p->se.avg_running, 0);
+ +              update_avg(&prev->se.avg_overlap, runtime);
         }
- -      p->sched_class->put_prev_task(rq, p);
+ +      prev->sched_class->put_prev_task(rq, prev);
   }
   
   /*
@@@ -5504,7 -5444,7 +5504,7 @@@ need_resched_nonpreemptible
   }
   EXPORT_SYMBOL(schedule);
   
- -#ifdef CONFIG_SMP
+ +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
   /*
    * Look out! "owner" is an entirely speculative pointer
    * access and not reliable.
@@@ -6198,14 -6138,22 +6198,14 @@@ __setscheduler(struct rq *rq, struct ta
         BUG_ON(p->se.on_rq);
   
         p->policy = policy;
- -      switch (p->policy) {
- -      case SCHED_NORMAL:
- -      case SCHED_BATCH:
- -      case SCHED_IDLE:
- -              p->sched_class = &fair_sched_class;
- -              break;
- -      case SCHED_FIFO:
- -      case SCHED_RR:
- -              p->sched_class = &rt_sched_class;
- -              break;
- -      }
- -
         p->rt_priority = prio;
         p->normal_prio = normal_prio(p);
         /* we are holding p->pi_lock already */
         p->prio = rt_mutex_getprio(p);
+ +      if (rt_prio(p->prio))
+ +              p->sched_class = &rt_sched_class;
+ +      else
+ +              p->sched_class = &fair_sched_class;
         set_load_weight(p);
   }
   
@@@ -6608,8 -6556,6 +6608,8 @@@ SYSCALL_DEFINE3(sched_setaffinity, pid_
   long sched_getaffinity(pid_t pid, struct cpumask *mask)
   {
         struct task_struct *p;
+ +      unsigned long flags;
+ +      struct rq *rq;
         int retval;
   
         get_online_cpus();
@@@ -6624,9 -6570,7 +6624,9 @@@
         if (retval)
                 goto out_unlock;
   
+ +      rq = task_rq_lock(p, &flags);
         cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+ +      task_rq_unlock(rq, &flags);
   
   out_unlock:
         read_unlock(&tasklist_lock);
@@@ -6772,6 -6716,9 +6772,6 @@@ EXPORT_SYMBOL(yield)
   /*
    * This task is about to go to sleep on IO. Increment rq->nr_iowait so
    * that process accounting knows that this is a task in IO wait state.
- - *
- - * But don't do that if it is a deliberate, throttling IO wait (this task
- - * has set its backing_dev_info: the queue against which it should throttle)
    */
   void __sched io_schedule(void)
   {
@@@ -6864,8 -6811,6 +6864,8 @@@ SYSCALL_DEFINE2(sched_rr_get_interval, 
   {
         struct task_struct *p;
         unsigned int time_slice;
+ +      unsigned long flags;
+ +      struct rq *rq;
         int retval;
         struct timespec t;
   
@@@ -6882,9 -6827,7 +6882,9 @@@
         if (retval)
                 goto out_unlock;
   
- -      time_slice = p->sched_class->get_rr_interval(p);
+ +      rq = task_rq_lock(p, &flags);
+ +      time_slice = p->sched_class->get_rr_interval(rq, p);
+ +      task_rq_unlock(rq, &flags);
   
         read_unlock(&tasklist_lock);
         jiffies_to_timespec(time_slice, &t);
@@@ -6958,7 -6901,7 +6958,7 @@@ void show_state_filter(unsigned long st
         /*
          * Only show locks if all tasks are dumped:
          */
- -      if (state_filter == -1)
+ +      if (!state_filter)
                 debug_show_all_locks();
   }
   
@@@ -6985,6 -6928,7 +6985,6 @@@ void __cpuinit init_idle(struct task_st
         __sched_fork(idle);
         idle->se.exec_start = sched_clock();
   
- -      idle->prio = idle->normal_prio = MAX_PRIO;
         cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
         __set_task_cpu(idle, cpu);
   
@@@ -7025,43 -6969,22 +7025,43 @@@ cpumask_var_t nohz_cpu_mask
    *
    * This idea comes from the SD scheduler of Con Kolivas:
    */
- -static inline void sched_init_granularity(void)
+ +static int get_update_sysctl_factor(void)
   {
- -      unsigned int factor = 1 + ilog2(num_online_cpus());
- -      const unsigned long limit = 200000000;
+ +      unsigned int cpus = min_t(int, num_online_cpus(), 8);
+ +      unsigned int factor;
+ +
+ +      switch (sysctl_sched_tunable_scaling) {
+ +      case SCHED_TUNABLESCALING_NONE:
+ +              factor = 1;
+ +              break;
+ +      case SCHED_TUNABLESCALING_LINEAR:
+ +              factor = cpus;
+ +              break;
+ +      case SCHED_TUNABLESCALING_LOG:
+ +      default:
+ +              factor = 1 + ilog2(cpus);
+ +              break;
+ +      }
   
- -      sysctl_sched_min_granularity *= factor;
- -      if (sysctl_sched_min_granularity > limit)
- -              sysctl_sched_min_granularity = limit;
+ +      return factor;
+ +}
   
- -      sysctl_sched_latency *= factor;
- -      if (sysctl_sched_latency > limit)
- -              sysctl_sched_latency = limit;
+ +static void update_sysctl(void)
+ +{
+ +      unsigned int factor = get_update_sysctl_factor();
   
- -      sysctl_sched_wakeup_granularity *= factor;
+ +#define SET_SYSCTL(name) \
+ +      (sysctl_##name = (factor) * normalized_sysctl_##name)
+ +      SET_SYSCTL(sched_min_granularity);
+ +      SET_SYSCTL(sched_latency);
+ +      SET_SYSCTL(sched_wakeup_granularity);
+ +      SET_SYSCTL(sched_shares_ratelimit);
+ +#undef SET_SYSCTL
+ +}
   
- -      sysctl_sched_shares_ratelimit *= factor;
+ +static inline void sched_init_granularity(void)
+ +{
+ +      update_sysctl();
   }
   
   #ifdef CONFIG_SMP
@@@ -7098,7 -7021,7 +7098,7 @@@ int set_cpus_allowed_ptr(struct task_st
         int ret = 0;
   
         rq = task_rq_lock(p, &flags);
- -      if (!cpumask_intersects(new_mask, cpu_online_mask)) {
+ +      if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                 ret = -EINVAL;
                 goto out;
         }
@@@ -7120,7 -7043,7 +7120,7 @@@
         if (cpumask_test_cpu(task_cpu(p), new_mask))
                 goto out;
   
- -      if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+ +      if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
                 /* Need help from migration thread: drop lock and wait. */
                 struct task_struct *mt = rq->migration_thread;
   
@@@ -7274,19 -7197,19 +7274,19 @@@ static void move_task_off_dead_cpu(int 
   
   again:
         /* Look for allowed, online CPU in same node. */
- -      for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
+ +      for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
                 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
                         goto move;
   
         /* Any allowed, online CPU? */
- -      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
+ +      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
         if (dest_cpu < nr_cpu_ids)
                 goto move;
   
         /* No more Mr. Nice Guy. */
         if (dest_cpu >= nr_cpu_ids) {
                 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
- -              dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
+ +              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
   
                 /*
                  * Don't tell them about moving exiting tasks or
@@@ -7315,7 -7238,7 +7315,7 @@@ move
    */
   static void migrate_nr_uninterruptible(struct rq *rq_src)
   {
- -      struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
+ +      struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
         unsigned long flags;
   
         local_irq_save(flags);
@@@ -7449,16 -7372,17 +7449,16 @@@ static struct ctl_table sd_ctl_dir[] = 
                 .procname       = "sched_domain",
                 .mode           = 0555,
         },
- -      {0, },
+ +      {}
   };
   
   static struct ctl_table sd_ctl_root[] = {
         {
- -              .ctl_name       = CTL_KERN,
                 .procname       = "kernel",
                 .mode           = 0555,
                 .child          = sd_ctl_dir,
         },
- -      {0, },
+ +      {}
   };
   
   static struct ctl_table *sd_alloc_ctl_entry(int n)
@@@ -7568,7 -7492,7 +7568,7 @@@ static ctl_table *sd_alloc_ctl_cpu_tabl
   static struct ctl_table_header *sd_sysctl_header;
   static void register_sched_domain_sysctl(void)
   {
- -      int i, cpu_num = num_online_cpus();
+ +      int i, cpu_num = num_possible_cpus();
         struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
         char buf[32];
   
@@@ -7578,7 -7502,7 +7578,7 @@@
         if (entry == NULL)
                 return;
   
- -      for_each_online_cpu(i) {
+ +      for_each_possible_cpu(i) {
                 snprintf(buf, 32, "cpu%d", i);
                 entry->procname = kstrdup(buf, GFP_KERNEL);
                 entry->mode = 0555;
@@@ -7708,6 -7632,7 +7708,6 @@@ migration_call(struct notifier_block *n
                 spin_lock_irq(&rq->lock);
                 update_rq_clock(rq);
                 deactivate_task(rq, rq->idle, 0);
- -              rq->idle->static_prio = MAX_PRIO;
                 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                 rq->idle->sched_class = &idle_sched_class;
                 migrate_dead_tasks(cpu);
@@@ -7781,16 -7706,6 +7781,16 @@@ early_initcall(migration_init)
   
   #ifdef CONFIG_SCHED_DEBUG
   
+ +static __read_mostly int sched_domain_debug_enabled;
+ +
+ +static int __init sched_domain_debug_setup(char *str)
+ +{
+ +      sched_domain_debug_enabled = 1;
+ +
+ +      return 0;
+ +}
+ +early_param("sched_debug", sched_domain_debug_setup);
+ +
   static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                   struct cpumask *groupmask)
   {
@@@ -7877,9 -7792,6 +7877,9 @@@ static void sched_domain_debug(struct s
         cpumask_var_t groupmask;
         int level = 0;
   
+ +      if (!sched_domain_debug_enabled)
+ +              return;
+ +
         if (!sd) {
                 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
                 return;
@@@ -7959,8 -7871,6 +7959,8 @@@ sd_parent_degenerate(struct sched_domai
   
   static void free_rootdomain(struct root_domain *rd)
   {
+ +      synchronize_sched();
+ +
         cpupri_cleanup(&rd->cpupri);
   
         free_cpumask_var(rd->rto_mask);
@@@ -8101,7 -8011,6 +8101,7 @@@ static cpumask_var_t cpu_isolated_map
   /* Setup the mask of cpus configured for isolated domains */
   static int __init isolated_cpu_setup(char *str)
   {
+ +      alloc_bootmem_cpumask_var(&cpu_isolated_map);
         cpulist_parse(str, cpu_isolated_map);
         return 1;
   }
@@@ -8286,14 -8195,14 +8286,14 @@@ enum s_alloc 
    */
   #ifdef CONFIG_SCHED_SMT
   static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
- static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
   
   static int
   cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
                  struct sched_group **sg, struct cpumask *unused)
   {
         if (sg)
-               *sg = &per_cpu(sched_group_cpus, cpu).sg;
+               *sg = &per_cpu(sched_groups, cpu).sg;
         return cpu;
   }
   #endif /* CONFIG_SCHED_SMT */
@@@ -8938,7 -8847,7 +8938,7 @@@ static int build_sched_domains(const st
         return __build_sched_domains(cpu_map, NULL);
   }
   
- -static struct cpumask *doms_cur;      /* current sched domains */
+ +static cpumask_var_t *doms_cur;       /* current sched domains */
   static int ndoms_cur;         /* number of sched domains in 'doms_cur' */
   static struct sched_domain_attr *dattr_cur;
                                 /* attribues of custom domains in 'doms_cur' */
@@@ -8960,31 -8869,6 +8960,31 @@@ int __attribute__((weak)) arch_update_c
         return 0;
   }
   
+ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
+ +{
+ +      int i;
+ +      cpumask_var_t *doms;
+ +
+ +      doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
+ +      if (!doms)
+ +              return NULL;
+ +      for (i = 0; i < ndoms; i++) {
+ +              if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
+ +                      free_sched_domains(doms, i);
+ +                      return NULL;
+ +              }
+ +      }
+ +      return doms;
+ +}
+ +
+ +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
+ +{
+ +      unsigned int i;
+ +      for (i = 0; i < ndoms; i++)
+ +              free_cpumask_var(doms[i]);
+ +      kfree(doms);
+ +}
+ +
   /*
    * Set up scheduler domains and groups. Callers must hold the hotplug lock.
    * For now this just excludes isolated cpus, but could be used to
@@@ -8996,12 -8880,12 +8996,12 @@@ static int arch_init_sched_domains(cons
   
         arch_update_cpu_topology();
         ndoms_cur = 1;
- -      doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
+ +      doms_cur = alloc_sched_domains(ndoms_cur);
         if (!doms_cur)
- -              doms_cur = fallback_doms;
- -      cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
+ +              doms_cur = &fallback_doms;
+ +      cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
         dattr_cur = NULL;
- -      err = build_sched_domains(doms_cur);
+ +      err = build_sched_domains(doms_cur[0]);
         register_sched_domain_sysctl();
   
         return err;
@@@ -9051,19 -8935,19 +9051,19 @@@ static int dattrs_equal(struct sched_do
    * doms_new[] to the current sched domain partitioning, doms_cur[].
    * It destroys each deleted domain and builds each new domain.
    *
- - * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
+ + * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
    * The masks don't intersect (don't overlap.) We should setup one
    * sched domain for each mask. CPUs not in any of the cpumasks will
    * not be load balanced. If the same cpumask appears both in the
    * current 'doms_cur' domains and in the new 'doms_new', we can leave
    * it as it is.
    *
- - * The passed in 'doms_new' should be kmalloc'd. This routine takes
- - * ownership of it and will kfree it when done with it. If the caller
- - * failed the kmalloc call, then it can pass in doms_new == NULL &&
- - * ndoms_new == 1, and partition_sched_domains() will fallback to
- - * the single partition 'fallback_doms', it also forces the domains
- - * to be rebuilt.
+ + * The passed in 'doms_new' should be allocated using
+ + * alloc_sched_domains.  This routine takes ownership of it and will
+ + * free_sched_domains it when done with it. If the caller failed the
+ + * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
+ + * and partition_sched_domains() will fallback to the single partition
+ + * 'fallback_doms', it also forces the domains to be rebuilt.
    *
    * If doms_new == NULL it will be replaced with cpu_online_mask.
    * ndoms_new == 0 is a special case for destroying existing domains,
@@@ -9071,7 -8955,8 +9071,7 @@@
    *
    * Call with hotplug lock held
    */
- -/* FIXME: Change to struct cpumask *doms_new[] */
- -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+ +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
                              struct sched_domain_attr *dattr_new)
   {
         int i, j, n;
@@@ -9090,40 -8975,40 +9090,40 @@@
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
                 for (j = 0; j < n && !new_topology; j++) {
- -                      if (cpumask_equal(&doms_cur[i], &doms_new[j])
+ +                      if (cpumask_equal(doms_cur[i], doms_new[j])
                             && dattrs_equal(dattr_cur, i, dattr_new, j))
                                 goto match1;
                 }
                 /* no match - a current sched domain not in new doms_new[] */
- -              detach_destroy_domains(doms_cur + i);
+ +              detach_destroy_domains(doms_cur[i]);
   match1:
                 ;
         }
   
         if (doms_new == NULL) {
                 ndoms_cur = 0;
- -              doms_new = fallback_doms;
- -              cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
+ +              doms_new = &fallback_doms;
+ +              cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
                 WARN_ON_ONCE(dattr_new);
         }
   
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
                 for (j = 0; j < ndoms_cur && !new_topology; j++) {
- -                      if (cpumask_equal(&doms_new[i], &doms_cur[j])
+ +                      if (cpumask_equal(doms_new[i], doms_cur[j])
                             && dattrs_equal(dattr_new, i, dattr_cur, j))
                                 goto match2;
                 }
                 /* no match - add a new doms_new */
- -              __build_sched_domains(doms_new + i,
+ +              __build_sched_domains(doms_new[i],
                                         dattr_new ? dattr_new + i : NULL);
   match2:
                 ;
         }
   
         /* Remember the new sched domains */
- -      if (doms_cur != fallback_doms)
- -              kfree(doms_cur);
+ +      if (doms_cur != &fallback_doms)
+ +              free_sched_domains(doms_cur, ndoms_cur);
         kfree(dattr_cur);       /* kfree(NULL) is safe */
         doms_cur = doms_new;
         dattr_cur = dattr_new;
@@@ -9234,10 -9119,8 +9234,10 @@@ static int update_sched_domains(struct 
         switch (action) {
         case CPU_ONLINE:
         case CPU_ONLINE_FROZEN:
- -      case CPU_DEAD:
- -      case CPU_DEAD_FROZEN:
+ +      case CPU_DOWN_PREPARE:
+ +      case CPU_DOWN_PREPARE_FROZEN:
+ +      case CPU_DOWN_FAILED:
+ +      case CPU_DOWN_FAILED_FROZEN:
                 partition_sched_domains(1, NULL, NULL);
                 return NOTIFY_OK;
   
@@@ -9284,7 -9167,7 +9284,7 @@@ void __init sched_init_smp(void
   #endif
         get_online_cpus();
         mutex_lock(&sched_domains_mutex);
- -      arch_init_sched_domains(cpu_online_mask);
+ +      arch_init_sched_domains(cpu_active_mask);
         cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
         if (cpumask_empty(non_isolated_cpus))
                 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@@ -9447,6 -9330,10 +9447,6 @@@ void __init sched_init(void
   #ifdef CONFIG_CPUMASK_OFFSTACK
         alloc_size += num_possible_cpus() * cpumask_size();
   #endif
- -      /*
- -       * As sched_init() is called before page_alloc is setup,
- -       * we use alloc_bootmem().
- -       */
         if (alloc_size) {
                 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
   
@@@ -9583,7 -9470,7 +9583,7 @@@
   #elif defined CONFIG_USER_SCHED
                 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
                 init_tg_rt_entry(&init_task_group,
-                               &per_cpu(init_rt_rq, i),
+                               &per_cpu(init_rt_rq_var, i),
                                 &per_cpu(init_sched_rt_entity, i), i, 1,
                                 root_task_group.rt_se[i]);
   #endif
@@@ -9601,8 -9488,6 +9601,8 @@@
                 rq->cpu = i;
                 rq->online = 0;
                 rq->migration_thread = NULL;
+ +              rq->idle_stamp = 0;
+ +              rq->avg_idle = 2*sysctl_sched_migration_cost;
                 INIT_LIST_HEAD(&rq->migration_queue);
                 rq_attach_root(rq, &def_root_domain);
   #endif
@@@ -9646,15 -9531,13 +9646,15 @@@
         current->sched_class = &fair_sched_class;
   
         /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
- -      alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
+ +      zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
   #ifdef CONFIG_SMP
   #ifdef CONFIG_NO_HZ
- -      alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+ +      zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
         alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
   #endif
- -      alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
+ +      /* May be allocated at isolcpus cmdline parse time */
+ +      if (cpu_isolated_map == NULL)
+ +              zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
   #endif /* SMP */
   
         perf_event_init();
@@@ -9848,15 -9731,13 +9848,15 @@@ int alloc_fair_sched_group(struct task_
                 se = kzalloc_node(sizeof(struct sched_entity),
                                   GFP_KERNEL, cpu_to_node(i));
                 if (!se)
- -                      goto err;
+ +                      goto err_free_rq;
   
                 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
         }
   
         return 1;
   
+ + err_free_rq:
+ +      kfree(cfs_rq);
    err:
         return 0;
   }
@@@ -9938,15 -9819,13 +9938,15 @@@ int alloc_rt_sched_group(struct task_gr
                 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
                                      GFP_KERNEL, cpu_to_node(i));
                 if (!rt_se)
- -                      goto err;
+ +                      goto err_free_rq;
   
                 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
         }
   
         return 1;
   
+ + err_free_rq:
+ +      kfree(rt_rq);
    err:
         return 0;
   }
@@@ -10988,7 -10867,6 +10988,7 @@@ void synchronize_sched_expedited(void
                 spin_unlock_irqrestore(&rq->lock, flags);
         }
         rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+ +      synchronize_sched_expedited_count++;
         mutex_unlock(&rcu_sched_expedited_mutex);
         put_online_cpus();
         if (need_full_sync)
diff --combined kernel/softirq.c

index 21939d9e830e22b040f15e5562c97b0a8b8a9e47,0740dfd55c51f8b6ffde0ae0c870eb9b51b36373..a09502e2ef758721917ab537e345da0036197628
--- 1/kernel/softirq.c
--- 2/kernel/softirq.c
+++ b/kernel/softirq.c
@@@ -302,9 -302,9 +302,9 @@@ void irq_exit(void
         if (!in_interrupt() && local_softirq_pending())
                 invoke_softirq();
   
+ +      rcu_irq_exit();
   #ifdef CONFIG_NO_HZ
         /* Make sure that timer wheel updates are propagated */
- -      rcu_irq_exit();
         if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
                 tick_nohz_stop_sched_tick(0);
   #endif
@@@ -697,7 -697,7 +697,7 @@@ void __init softirq_init(void
         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
   }
   
- static int ksoftirqd(void * __bind_cpu)
+ static int run_ksoftirqd(void * __bind_cpu)
   {
         set_current_state(TASK_INTERRUPTIBLE);
   
@@@ -810,7 -810,7 +810,7 @@@ static int __cpuinit cpu_callback(struc
         switch (action) {
         case CPU_UP_PREPARE:
         case CPU_UP_PREPARE_FROZEN:
-               p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+               p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
                 if (IS_ERR(p)) {
                         printk("ksoftirqd for %i failed\n", hotcpu);
                         return NOTIFY_BAD;
diff --combined kernel/trace/trace.c

index 88bd9ae2a9ed9b71b881c665f7cdb4eb722cf309,85a5ed70b5b237326732593248e98066daf2c9d9..c82dfd92fdfd8d663b6146ea6a584d42ea715508
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -86,17 -86,17 +86,17 @@@ static int dummy_set_flag(u32 old_flags
    */
   static int tracing_disabled = 1;
   
- DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
   
   static inline void ftrace_disable_cpu(void)
   {
         preempt_disable();
-       local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
   }
   
   static inline void ftrace_enable_cpu(void)
   {
-       local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
         preempt_enable();
   }
   
@@@ -129,7 -129,7 +129,7 @@@ static int tracing_set_tracer(const cha
   static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
   static char *default_bootup_tracer;
   
- -static int __init set_ftrace(char *str)
+ +static int __init set_cmdline_ftrace(char *str)
   {
         strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
         default_bootup_tracer = bootup_tracer_buf;
@@@ -137,7 -137,7 +137,7 @@@
         ring_buffer_expanded = 1;
         return 1;
   }
- -__setup("ftrace=", set_ftrace);
+ +__setup("ftrace=", set_cmdline_ftrace);
   
   static int __init set_ftrace_dump_on_oops(char *str)
   {
@@@ -203,7 -203,7 +203,7 @@@ cycle_t ftrace_now(int cpu
    */
   static struct trace_array     max_tr;
   
- static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+ static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
   
   /* tracer_enabled is used to toggle activation of a tracer */
   static int                    tracer_enabled = 1;
@@@ -1085,7 -1085,7 +1085,7 @@@ trace_function(struct trace_array *tr
         struct ftrace_entry *entry;
   
         /* If we are reading the ring buffer, don't trace */
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                 return;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@@ -1363,6 -1363,9 +1363,6 @@@ int trace_array_vprintk(struct trace_ar
         __raw_spin_lock(&trace_buf_lock);
         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
   
- -      len = min(len, TRACE_BUF_SIZE-1);
- -      trace_buf[len] = 0;
- -
         size = sizeof(*entry) + len + 1;
         buffer = tr->buffer;
         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@@ -1370,10 -1373,10 +1370,10 @@@
         if (!event)
                 goto out_unlock;
         entry = ring_buffer_event_data(event);
- -      entry->ip                       = ip;
+ +      entry->ip = ip;
   
         memcpy(&entry->buf, trace_buf, len);
- -      entry->buf[len] = 0;
+ +      entry->buf[len] = '\0';
         if (!filter_check_discard(call, entry, buffer, event))
                 ring_buffer_unlock_commit(buffer, event);
   
@@@ -1390,7 -1393,7 +1390,7 @@@
   
   int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
   {
- -      return trace_array_printk(&global_trace, ip, fmt, args);
+ +      return trace_array_vprintk(&global_trace, ip, fmt, args);
   }
   EXPORT_SYMBOL_GPL(trace_vprintk);
   
@@@ -1512,8 -1515,6 +1512,8 @@@ static void *s_next(struct seq_file *m
         int i = (int)*pos;
         void *ent;
   
+ +      WARN_ON_ONCE(iter->leftover);
+ +
         (*pos)++;
   
         /* can't go backwards */
@@@ -1612,16 -1613,8 +1612,16 @@@ static void *s_start(struct seq_file *m
                         ;
   
         } else {
- -              l = *pos - 1;
- -              p = s_next(m, p, &l);
+ +              /*
+ +               * If we overflowed the seq_file before, then we want
+ +               * to just reuse the trace_seq buffer again.
+ +               */
+ +              if (iter->leftover)
+ +                      p = iter;
+ +              else {
+ +                      l = *pos - 1;
+ +                      p = s_next(m, p, &l);
+ +              }
         }
   
         trace_event_read_lock();
@@@ -1929,7 -1922,6 +1929,7 @@@ static enum print_line_t print_trace_li
   static int s_show(struct seq_file *m, void *v)
   {
         struct trace_iterator *iter = v;
+ +      int ret;
   
         if (iter->ent == NULL) {
                 if (iter->tr) {
@@@ -1949,27 -1941,9 +1949,27 @@@
                         if (!(trace_flags & TRACE_ITER_VERBOSE))
                                 print_func_help_header(m);
                 }
+ +      } else if (iter->leftover) {
+ +              /*
+ +               * If we filled the seq_file buffer earlier, we
+ +               * want to just show it now.
+ +               */
+ +              ret = trace_print_seq(m, &iter->seq);
+ +
+ +              /* ret should this time be zero, but you never know */
+ +              iter->leftover = ret;
+ +
         } else {
                 print_trace_line(iter);
- -              trace_print_seq(m, &iter->seq);
+ +              ret = trace_print_seq(m, &iter->seq);
+ +              /*
+ +               * If we overflow the seq_file buffer, then it will
+ +               * ask us for this data again at start up.
+ +               * Use that instead.
+ +               *  ret is 0 if seq_file write succeeded.
+ +               *        -1 otherwise.
+ +               */
+ +              iter->leftover = ret;
         }
   
         return 0;
@@@ -2466,7 -2440,7 +2466,7 @@@ tracing_trace_options_write(struct fil
                         return ret;
         }
   
- -      filp->f_pos += cnt;
+ +      *ppos += cnt;
   
         return cnt;
   }
@@@ -2608,7 -2582,7 +2608,7 @@@ tracing_ctrl_write(struct file *filp, c
         }
         mutex_unlock(&trace_types_lock);
   
- -      filp->f_pos += cnt;
+ +      *ppos += cnt;
   
         return cnt;
   }
@@@ -2790,7 -2764,7 +2790,7 @@@ tracing_set_trace_write(struct file *fi
         if (err)
                 return err;
   
- -      filp->f_pos += ret;
+ +      *ppos += ret;
   
         return ret;
   }
@@@ -2923,10 -2897,6 +2923,10 @@@ static int tracing_release_pipe(struct 
         else
                 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
   
+ +
+ +      if (iter->trace->pipe_close)
+ +              iter->trace->pipe_close(iter);
+ +
         mutex_unlock(&trace_types_lock);
   
         free_cpumask_var(iter->started);
@@@ -3329,7 -3299,7 +3329,7 @@@ tracing_entries_write(struct file *filp
                 }
         }
   
- -      filp->f_pos += cnt;
+ +      *ppos += cnt;
   
         /* If check pages failed, return ENOMEM */
         if (tracing_disabled)
@@@ -3364,6 -3334,7 +3364,6 @@@ tracing_mark_write(struct file *filp, c
                                         size_t cnt, loff_t *fpos)
   {
         char *buf;
- -      char *end;
   
         if (tracing_disabled)
                 return -EINVAL;
@@@ -3371,7 -3342,7 +3371,7 @@@
         if (cnt > TRACE_BUF_SIZE)
                 cnt = TRACE_BUF_SIZE;
   
- -      buf = kmalloc(cnt + 1, GFP_KERNEL);
+ +      buf = kmalloc(cnt + 2, GFP_KERNEL);
         if (buf == NULL)
                 return -ENOMEM;
   
@@@ -3379,13 -3350,14 +3379,13 @@@
                 kfree(buf);
                 return -EFAULT;
         }
+ +      if (buf[cnt-1] != '\n') {
+ +              buf[cnt] = '\n';
+ +              buf[cnt+1] = '\0';
+ +      } else
+ +              buf[cnt] = '\0';
   
- -      /* Cut from the first nil or newline. */
- -      buf[cnt] = '\0';
- -      end = strchr(buf, '\n');
- -      if (end)
- -              *end = '\0';
- -
- -      cnt = mark_printk("%s\n", buf);
+ +      cnt = mark_printk("%s", buf);
         kfree(buf);
         *fpos += cnt;
   
@@@ -3758,7 -3730,7 +3758,7 @@@ tracing_stats_read(struct file *filp, c
   
         s = kmalloc(sizeof(*s), GFP_KERNEL);
         if (!s)
- -              return ENOMEM;
+ +              return -ENOMEM;
   
         trace_seq_init(s);
   
@@@ -4454,7 -4426,7 +4454,7 @@@ __init static int tracer_alloc_buffers(
         /* Allocate the first page for all buffers */
         for_each_tracing_cpu(i) {
                 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_data, i);
+               max_tr.data[i] = &per_cpu(max_tr_data, i);
         }
   
         trace_init_cmdlines();
diff --combined kernel/trace/trace.h

index 7fa33cab69629299a9d4470fbf13db81d20e3c94,542f45554883bcf3a1a75f79e2804b6c07e7d8ab..a52bed2eedd848ec5e7c249d479baa8d14a98ad6
--- 1/kernel/trace/trace.h
--- 2/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@@ -11,7 -11,6 +11,7 @@@
   #include <linux/ftrace.h>
   #include <trace/boot.h>
   #include <linux/kmemtrace.h>
+ +#include <linux/hw_breakpoint.h>
   
   #include <linux/trace_seq.h>
   #include <linux/ftrace_event.h>
@@@ -38,7 -37,6 +38,7 @@@ enum trace_type 
         TRACE_KMEM_ALLOC,
         TRACE_KMEM_FREE,
         TRACE_BLK,
+ +      TRACE_KSYM,
   
         __TRACE_LAST_TYPE,
   };
@@@ -100,32 -98,9 +100,32 @@@ struct syscall_trace_enter 
   struct syscall_trace_exit {
         struct trace_entry      ent;
         int                     nr;
- -      unsigned long           ret;
+ +      long                    ret;
   };
   
+ +struct kprobe_trace_entry {
+ +      struct trace_entry      ent;
+ +      unsigned long           ip;
+ +      int                     nargs;
+ +      unsigned long           args[];
+ +};
+ +
+ +#define SIZEOF_KPROBE_TRACE_ENTRY(n)                  \
+ +      (offsetof(struct kprobe_trace_entry, args) +    \
+ +      (sizeof(unsigned long) * (n)))
+ +
+ +struct kretprobe_trace_entry {
+ +      struct trace_entry      ent;
+ +      unsigned long           func;
+ +      unsigned long           ret_ip;
+ +      int                     nargs;
+ +      unsigned long           args[];
+ +};
+ +
+ +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)                       \
+ +      (offsetof(struct kretprobe_trace_entry, args) + \
+ +      (sizeof(unsigned long) * (n)))
+ +
   /*
    * trace_flag_type is an enumeration that holds different
    * states when a trace occurs. These are:
@@@ -234,7 -209,6 +234,7 @@@ extern void __ftrace_bad_type(void)
                           TRACE_KMEM_ALLOC);    \
                 IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                           TRACE_KMEM_FREE);     \
+ +              IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
                 __ftrace_bad_type();                                    \
         } while (0)
   
@@@ -272,7 -246,6 +272,7 @@@ struct tracer_flags 
    * @pipe_open: called when the trace_pipe file is opened
    * @wait_pipe: override how the user waits for traces on trace_pipe
    * @close: called when the trace file is released
+ + * @pipe_close: called when the trace_pipe file is released
    * @read: override the default read callback on trace_pipe
    * @splice_read: override the default splice_read callback on trace_pipe
    * @selftest: selftest to run on boot (see trace_selftest.c)
@@@ -291,7 -264,6 +291,7 @@@ struct tracer 
         void                    (*pipe_open)(struct trace_iterator *iter);
         void                    (*wait_pipe)(struct trace_iterator *iter);
         void                    (*close)(struct trace_iterator *iter);
+ +      void                    (*pipe_close)(struct trace_iterator *iter);
         ssize_t                 (*read)(struct trace_iterator *iter,
                                         struct file *filp, char __user *ubuf,
                                         size_t cnt, loff_t *ppos);
@@@ -392,8 -364,6 +392,8 @@@ int register_tracer(struct tracer *type
   void unregister_tracer(struct tracer *type);
   int is_tracing_stopped(void);
   
+ +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+ +
   extern unsigned long nsecs_to_usecs(unsigned long nsecs);
   
   #ifdef CONFIG_TRACER_MAX_TRACE
@@@ -443,7 -413,7 +443,7 @@@ extern int DYN_FTRACE_TEST_NAME(void)
   
   extern int ring_buffer_expanded;
   extern bool tracing_selftest_disabled;
- DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
+ DECLARE_PER_CPU(int, ftrace_cpu_disabled);
   
   #ifdef CONFIG_FTRACE_STARTUP_TEST
   extern int trace_selftest_startup_function(struct tracer *trace,
@@@ -468,8 -438,6 +468,8 @@@ extern int trace_selftest_startup_branc
                                          struct trace_array *tr);
   extern int trace_selftest_startup_hw_branches(struct tracer *trace,
                                               struct trace_array *tr);
+ +extern int trace_selftest_startup_ksym(struct tracer *trace,
+ +                                       struct trace_array *tr);
   #endif /* CONFIG_FTRACE_STARTUP_TEST */
   
   extern void *head_page(struct trace_array_cpu *data);
@@@ -515,6 -483,10 +515,6 @@@ static inline int ftrace_graph_addr(uns
         return 0;
   }
   #else
- -static inline int ftrace_trace_addr(unsigned long addr)
- -{
- -      return 1;
- -}
   static inline int ftrace_graph_addr(unsigned long addr)
   {
         return 1;
@@@ -528,12 -500,12 +528,12 @@@ print_graph_function(struct trace_itera
   }
   #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
   
- -extern struct pid *ftrace_pid_trace;
+ +extern struct list_head ftrace_pids;
   
   #ifdef CONFIG_FUNCTION_TRACER
   static inline int ftrace_trace_task(struct task_struct *task)
   {
- -      if (!ftrace_pid_trace)
+ +      if (list_empty(&ftrace_pids))
                 return 1;
   
         return test_tsk_trace_trace(task);
@@@ -715,6 -687,7 +715,6 @@@ struct event_filter 
         int                     n_preds;
         struct filter_pred      **preds;
         char                    *filter_string;
- -      bool                    no_reset;
   };
   
   struct event_subsystem {
@@@ -726,40 -699,22 +726,40 @@@
   };
   
   struct filter_pred;
+ +struct regex;
   
   typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
                                  int val1, int val2);
   
+ +typedef int (*regex_match_func)(char *str, struct regex *r, int len);
+ +
+ +enum regex_type {
+ +      MATCH_FULL = 0,
+ +      MATCH_FRONT_ONLY,
+ +      MATCH_MIDDLE_ONLY,
+ +      MATCH_END_ONLY,
+ +};
+ +
+ +struct regex {
+ +      char                    pattern[MAX_FILTER_STR_VAL];
+ +      int                     len;
+ +      int                     field_len;
+ +      regex_match_func        match;
+ +};
+ +
   struct filter_pred {
- -      filter_pred_fn_t fn;
- -      u64 val;
- -      char str_val[MAX_FILTER_STR_VAL];
- -      int str_len;
- -      char *field_name;
- -      int offset;
- -      int not;
- -      int op;
- -      int pop_n;
+ +      filter_pred_fn_t        fn;
+ +      u64                     val;
+ +      struct regex            regex;
+ +      char                    *field_name;
+ +      int                     offset;
+ +      int                     not;
+ +      int                     op;
+ +      int                     pop_n;
   };
   
+ +extern enum regex_type
+ +filter_parse_regex(char *buff, int len, char **search, int *not);
   extern void print_event_filter(struct ftrace_event_call *call,
                                struct trace_seq *s);
   extern int apply_event_filter(struct ftrace_event_call *call,
@@@ -775,8 -730,7 +775,8 @@@ filter_check_discard(struct ftrace_even
                      struct ring_buffer *buffer,
                      struct ring_buffer_event *event)
   {
- -      if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+ +      if (unlikely(call->filter_active) &&
+ +          !filter_match_preds(call->filter, rec)) {
                 ring_buffer_discard_commit(buffer, event);
                 return 1;
         }
diff --combined kernel/trace/trace_functions_graph.c

index a43d009c561a28bcbf1078626e086c63bf475d5e,90a6daa10962432e9bb40ff439a26d3add840fe9..b1342c5d37cfb821cfb96fcfd610cb95cdfb1082
--- 1/kernel/trace/trace_functions_graph.c
--- 2/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@@ -14,20 -14,9 +14,20 @@@
   #include "trace.h"
   #include "trace_output.h"
   
- -struct fgraph_data {
+ +struct fgraph_cpu_data {
         pid_t           last_pid;
         int             depth;
+ +      int             ignore;
+ +};
+ +
+ +struct fgraph_data {
+ +      struct fgraph_cpu_data          *cpu_data;
+ +
+ +      /* Place to preserve last processed entry. */
+ +      struct ftrace_graph_ent_entry   ent;
+ +      struct ftrace_graph_ret_entry   ret;
+ +      int                             failed;
+ +      int                             cpu;
   };
   
   #define TRACE_GRAPH_INDENT    2
@@@ -187,7 -176,7 +187,7 @@@ static int __trace_graph_entry(struct t
         struct ring_buffer *buffer = tr->buffer;
         struct ftrace_graph_ent_entry *entry;
   
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                 return 0;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@@ -251,7 -240,7 +251,7 @@@ static void __trace_graph_return(struc
         struct ring_buffer *buffer = tr->buffer;
         struct ftrace_graph_ret_entry *entry;
   
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                 return;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@@ -395,7 -384,7 +395,7 @@@ verif_pid(struct trace_seq *s, pid_t pi
         if (!data)
                 return TRACE_TYPE_HANDLED;
   
- -      last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+ +      last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
   
         if (*last_pid == pid)
                 return TRACE_TYPE_HANDLED;
@@@ -446,49 -435,26 +446,49 @@@ static struct ftrace_graph_ret_entry 
   get_return_for_leaf(struct trace_iterator *iter,
                 struct ftrace_graph_ent_entry *curr)
   {
- -      struct ring_buffer_iter *ring_iter;
+ +      struct fgraph_data *data = iter->private;
+ +      struct ring_buffer_iter *ring_iter = NULL;
         struct ring_buffer_event *event;
         struct ftrace_graph_ret_entry *next;
   
- -      ring_iter = iter->buffer_iter[iter->cpu];
+ +      /*
+ +       * If the previous output failed to write to the seq buffer,
+ +       * then we just reuse the data from before.
+ +       */
+ +      if (data && data->failed) {
+ +              curr = &data->ent;
+ +              next = &data->ret;
+ +      } else {
   
- -      /* First peek to compare current entry and the next one */
- -      if (ring_iter)
- -              event = ring_buffer_iter_peek(ring_iter, NULL);
- -      else {
- -      /* We need to consume the current entry to see the next one */
- -              ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
- -              event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
- -                                      NULL);
- -      }
+ +              ring_iter = iter->buffer_iter[iter->cpu];
+ +
+ +              /* First peek to compare current entry and the next one */
+ +              if (ring_iter)
+ +                      event = ring_buffer_iter_peek(ring_iter, NULL);
+ +              else {
+ +                      /*
+ +                       * We need to consume the current entry to see
+ +                       * the next one.
+ +                       */
+ +                      ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+ +                      event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+ +                                               NULL);
+ +              }
   
- -      if (!event)
- -              return NULL;
+ +              if (!event)
+ +                      return NULL;
+ +
+ +              next = ring_buffer_event_data(event);
   
- -      next = ring_buffer_event_data(event);
+ +              if (data) {
+ +                      /*
+ +                       * Save current and next entries for later reference
+ +                       * if the output fails.
+ +                       */
+ +                      data->ent = *curr;
+ +                      data->ret = *next;
+ +              }
+ +      }
   
         if (next->ent.type != TRACE_GRAPH_RET)
                 return NULL;
@@@ -674,7 -640,7 +674,7 @@@ print_graph_entry_leaf(struct trace_ite
   
         if (data) {
                 int cpu = iter->cpu;
- -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 /*
                  * Comments display at + 1 to depth. Since
@@@ -722,7 -688,7 +722,7 @@@ print_graph_entry_nested(struct trace_i
   
         if (data) {
                 int cpu = iter->cpu;
- -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 *depth = call->depth;
         }
@@@ -816,34 -782,19 +816,34 @@@ static enum print_line_
   print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
                         struct trace_iterator *iter)
   {
- -      int cpu = iter->cpu;
+ +      struct fgraph_data *data = iter->private;
         struct ftrace_graph_ent *call = &field->graph_ent;
         struct ftrace_graph_ret_entry *leaf_ret;
+ +      static enum print_line_t ret;
+ +      int cpu = iter->cpu;
   
         if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
                 return TRACE_TYPE_PARTIAL_LINE;
   
         leaf_ret = get_return_for_leaf(iter, field);
         if (leaf_ret)
- -              return print_graph_entry_leaf(iter, field, leaf_ret, s);
+ +              ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
         else
- -              return print_graph_entry_nested(iter, field, s, cpu);
+ +              ret = print_graph_entry_nested(iter, field, s, cpu);
   
+ +      if (data) {
+ +              /*
+ +               * If we failed to write our output, then we need to make
+ +               * note of it. Because we already consumed our entry.
+ +               */
+ +              if (s->full) {
+ +                      data->failed = 1;
+ +                      data->cpu = cpu;
+ +              } else
+ +                      data->failed = 0;
+ +      }
+ +
+ +      return ret;
   }
   
   static enum print_line_t
@@@ -859,7 -810,7 +859,7 @@@ print_graph_return(struct ftrace_graph_
   
         if (data) {
                 int cpu = iter->cpu;
- -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 /*
                  * Comments display at + 1 to depth. This is the
@@@ -922,7 -873,7 +922,7 @@@ print_graph_comment(struct trace_seq *s
         int i;
   
         if (data)
- -              depth = per_cpu_ptr(data, iter->cpu)->depth;
+ +              depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
   
         if (print_graph_prologue(iter, s, 0, 0))
                 return TRACE_TYPE_PARTIAL_LINE;
@@@ -990,33 -941,8 +990,33 @@@
   enum print_line_t
   print_graph_function(struct trace_iterator *iter)
   {
+ +      struct ftrace_graph_ent_entry *field;
+ +      struct fgraph_data *data = iter->private;
         struct trace_entry *entry = iter->ent;
         struct trace_seq *s = &iter->seq;
+ +      int cpu = iter->cpu;
+ +      int ret;
+ +
+ +      if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+ +              per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+ +              return TRACE_TYPE_HANDLED;
+ +      }
+ +
+ +      /*
+ +       * If the last output failed, there's a possibility we need
+ +       * to print out the missing entry which would never go out.
+ +       */
+ +      if (data && data->failed) {
+ +              field = &data->ent;
+ +              iter->cpu = data->cpu;
+ +              ret = print_graph_entry(field, s, iter);
+ +              if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+ +                      per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+ +                      ret = TRACE_TYPE_NO_CONSUME;
+ +              }
+ +              iter->cpu = cpu;
+ +              return ret;
+ +      }
   
         switch (entry->type) {
         case TRACE_GRAPH_ENT: {
@@@ -1026,7 -952,7 +1026,7 @@@
                  * sizeof(struct ftrace_graph_ent_entry) is very small,
                  * it can be safely saved at the stack.
                  */
- -              struct ftrace_graph_ent_entry *field, saved;
+ +              struct ftrace_graph_ent_entry saved;
                 trace_assign_type(field, entry);
                 saved = *field;
                 return print_graph_entry(&saved, s, iter);
@@@ -1104,54 -1030,31 +1104,54 @@@ static void print_graph_headers(struct 
   static void graph_trace_open(struct trace_iterator *iter)
   {
         /* pid and depth on the last trace processed */
- -      struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+ +      struct fgraph_data *data;
         int cpu;
   
+ +      iter->private = NULL;
+ +
+ +      data = kzalloc(sizeof(*data), GFP_KERNEL);
         if (!data)
- -              pr_warning("function graph tracer: not enough memory\n");
- -      else
- -              for_each_possible_cpu(cpu) {
- -                      pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
- -                      int *depth = &(per_cpu_ptr(data, cpu)->depth);
- -                      *pid = -1;
- -                      *depth = 0;
- -              }
+ +              goto out_err;
+ +
+ +      data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+ +      if (!data->cpu_data)
+ +              goto out_err_free;
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+ +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+ +              int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+ +              *pid = -1;
+ +              *depth = 0;
+ +              *ignore = 0;
+ +      }
   
         iter->private = data;
+ +
+ +      return;
+ +
+ + out_err_free:
+ +      kfree(data);
+ + out_err:
+ +      pr_warning("function graph tracer: not enough memory\n");
   }
   
   static void graph_trace_close(struct trace_iterator *iter)
   {
- -      free_percpu(iter->private);
+ +      struct fgraph_data *data = iter->private;
+ +
+ +      if (data) {
+ +              free_percpu(data->cpu_data);
+ +              kfree(data);
+ +      }
   }
   
   static struct tracer graph_trace __read_mostly = {
         .name           = "function_graph",
         .open           = graph_trace_open,
+ +      .pipe_open      = graph_trace_open,
         .close          = graph_trace_close,
+ +      .pipe_close     = graph_trace_close,
         .wait_pipe      = poll_wait_pipe,
         .init           = graph_trace_init,
         .reset          = graph_trace_reset,
diff --combined kernel/trace/trace_hw_branches.c

index 69543a905cd5f1c92086cb47576a21028db11a37,adaf7a39d0dcbaacad64d26bfbaab671b8d74fe8..7b97000745f5cc38601aff4101b58d9d8bb99b6c
--- 1/kernel/trace/trace_hw_branches.c
--- 2/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@@ -20,10 -20,10 +20,10 @@@
   
   #define BTS_BUFFER_SIZE (1 << 13)
   
- static DEFINE_PER_CPU(struct bts_tracer *, tracer);
- static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
+ static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
+ static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
   
- #define this_tracer per_cpu(tracer, smp_processor_id())
+ #define this_tracer per_cpu(hwb_tracer, smp_processor_id())
   
   static int trace_hw_branches_enabled __read_mostly;
   static int trace_hw_branches_suspended __read_mostly;
@@@ -32,12 -32,13 +32,13 @@@ static struct trace_array *hw_branch_tr
   
   static void bts_trace_init_cpu(int cpu)
   {
-       per_cpu(tracer, cpu) =
-               ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
-                                  NULL, (size_t)-1, BTS_KERNEL);
+       per_cpu(hwb_tracer, cpu) =
+               ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
+                                  BTS_BUFFER_SIZE, NULL, (size_t)-1,
+                                  BTS_KERNEL);
   
-       if (IS_ERR(per_cpu(tracer, cpu)))
-               per_cpu(tracer, cpu) = NULL;
+       if (IS_ERR(per_cpu(hwb_tracer, cpu)))
+               per_cpu(hwb_tracer, cpu) = NULL;
   }
   
   static int bts_trace_init(struct trace_array *tr)
@@@ -51,7 -52,7 +52,7 @@@
         for_each_online_cpu(cpu) {
                 bts_trace_init_cpu(cpu);
   
-               if (likely(per_cpu(tracer, cpu)))
+               if (likely(per_cpu(hwb_tracer, cpu)))
                         trace_hw_branches_enabled = 1;
         }
         trace_hw_branches_suspended = 0;
@@@ -67,9 -68,9 +68,9 @@@ static void bts_trace_reset(struct trac
   
         get_online_cpus();
         for_each_online_cpu(cpu) {
-               if (likely(per_cpu(tracer, cpu))) {
-                       ds_release_bts(per_cpu(tracer, cpu));
-                       per_cpu(tracer, cpu) = NULL;
+               if (likely(per_cpu(hwb_tracer, cpu))) {
+                       ds_release_bts(per_cpu(hwb_tracer, cpu));
+                       per_cpu(hwb_tracer, cpu) = NULL;
                 }
         }
         trace_hw_branches_enabled = 0;
@@@ -83,8 -84,8 +84,8 @@@ static void bts_trace_start(struct trac
   
         get_online_cpus();
         for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_resume_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_resume_bts(per_cpu(hwb_tracer, cpu));
         trace_hw_branches_suspended = 0;
         put_online_cpus();
   }
@@@ -95,8 -96,8 +96,8 @@@ static void bts_trace_stop(struct trace
   
         get_online_cpus();
         for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_suspend_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_suspend_bts(per_cpu(hwb_tracer, cpu));
         trace_hw_branches_suspended = 1;
         put_online_cpus();
   }
@@@ -114,16 -115,16 +115,16 @@@ static int __cpuinit bts_hotcpu_handler
                         bts_trace_init_cpu(cpu);
   
                         if (trace_hw_branches_suspended &&
-                           likely(per_cpu(tracer, cpu)))
-                               ds_suspend_bts(per_cpu(tracer, cpu));
+                           likely(per_cpu(hwb_tracer, cpu)))
+                               ds_suspend_bts(per_cpu(hwb_tracer, cpu));
                 }
                 break;
   
         case CPU_DOWN_PREPARE:
                 /* The notification is sent with interrupts enabled. */
-               if (likely(per_cpu(tracer, cpu))) {
-                       ds_release_bts(per_cpu(tracer, cpu));
-                       per_cpu(tracer, cpu) = NULL;
+               if (likely(per_cpu(hwb_tracer, cpu))) {
+                       ds_release_bts(per_cpu(hwb_tracer, cpu));
+                       per_cpu(hwb_tracer, cpu) = NULL;
                 }
         }
   
@@@ -165,7 -166,6 +166,7 @@@ void trace_hw_branch(u64 from, u64 to
         struct ftrace_event_call *call = &event_hw_branch;
         struct trace_array *tr = hw_branch_trace;
         struct ring_buffer_event *event;
+ +      struct ring_buffer *buf;
         struct hw_branch_entry *entry;
         unsigned long irq1;
         int cpu;
@@@ -181,8 -181,7 +182,8 @@@
         if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
                 goto out;
   
- -      event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
+ +      buf = tr->buffer;
+ +      event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
                                           sizeof(*entry), 0, 0);
         if (!event)
                 goto out;
@@@ -191,8 -190,8 +192,8 @@@
         entry->ent.type = TRACE_HW_BRANCHES;
         entry->from = from;
         entry->to   = to;
- -      if (!filter_check_discard(call, entry, tr->buffer, event))
- -              trace_buffer_unlock_commit(tr, event, 0, 0);
+ +      if (!filter_check_discard(call, entry, buf, event))
+ +              trace_buffer_unlock_commit(buf, event, 0, 0);
   
    out:
         atomic_dec(&tr->data[cpu]->disabled);
@@@ -258,8 -257,8 +259,8 @@@ static void trace_bts_prepare(struct tr
   
         get_online_cpus();
         for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_suspend_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_suspend_bts(per_cpu(hwb_tracer, cpu));
         /*
          * We need to collect the trace on the respective cpu since ftrace
          * implicitly adds the record for the current cpu.
@@@ -268,8 -267,8 +269,8 @@@
         on_each_cpu(trace_bts_cpu, iter->tr, 1);
   
         for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_resume_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_resume_bts(per_cpu(hwb_tracer, cpu));
         put_online_cpus();
   }
   
diff --combined mm/slab.c

index a6c9166996a9f389e72f3c71a2df7bacc937fe6f,211b1746c63ca7c723921b6b3ab6ba17a9a907c1..29b09599af7cb4510c37e02d873eab4e2cd52c79
--- 1/mm/slab.c
--- 2/mm/slab.c
+++ b/mm/slab.c
@@@ -604,26 -604,6 +604,26 @@@ static struct kmem_cache cache_cache = 
   
   #define BAD_ALIEN_MAGIC 0x01020304ul
   
+ +/*
+ + * chicken and egg problem: delay the per-cpu array allocation
+ + * until the general caches are up.
+ + */
+ +static enum {
+ +      NONE,
+ +      PARTIAL_AC,
+ +      PARTIAL_L3,
+ +      EARLY,
+ +      FULL
+ +} g_cpucache_up;
+ +
+ +/*
+ + * used by boot code to determine if it can use slab based allocator
+ + */
+ +int slab_is_available(void)
+ +{
+ +      return g_cpucache_up >= EARLY;
+ +}
+ +
   #ifdef CONFIG_LOCKDEP
   
   /*
@@@ -640,52 -620,40 +640,52 @@@
   static struct lock_class_key on_slab_l3_key;
   static struct lock_class_key on_slab_alc_key;
   
- -static inline void init_lock_keys(void)
- -
+ +static void init_node_lock_keys(int q)
   {
- -      int q;
         struct cache_sizes *s = malloc_sizes;
   
- -      while (s->cs_size != ULONG_MAX) {
- -              for_each_node(q) {
- -                      struct array_cache **alc;
- -                      int r;
- -                      struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
- -                      if (!l3 || OFF_SLAB(s->cs_cachep))
- -                              continue;
- -                      lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
- -                      alc = l3->alien;
- -                      /*
- -                       * FIXME: This check for BAD_ALIEN_MAGIC
- -                       * should go away when common slab code is taught to
- -                       * work even without alien caches.
- -                       * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- -                       * for alloc_alien_cache,
- -                       */
- -                      if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- -                              continue;
- -                      for_each_node(r) {
- -                              if (alc[r])
- -                                      lockdep_set_class(&alc[r]->lock,
- -                                           &on_slab_alc_key);
- -                      }
+ +      if (g_cpucache_up != FULL)
+ +              return;
+ +
+ +      for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
+ +              struct array_cache **alc;
+ +              struct kmem_list3 *l3;
+ +              int r;
+ +
+ +              l3 = s->cs_cachep->nodelists[q];
+ +              if (!l3 || OFF_SLAB(s->cs_cachep))
+ +                      return;
+ +              lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+ +              alc = l3->alien;
+ +              /*
+ +               * FIXME: This check for BAD_ALIEN_MAGIC
+ +               * should go away when common slab code is taught to
+ +               * work even without alien caches.
+ +               * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+ +               * for alloc_alien_cache,
+ +               */
+ +              if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+ +                      return;
+ +              for_each_node(r) {
+ +                      if (alc[r])
+ +                              lockdep_set_class(&alc[r]->lock,
+ +                                      &on_slab_alc_key);
                 }
- -              s++;
         }
   }
+ +
+ +static inline void init_lock_keys(void)
+ +{
+ +      int node;
+ +
+ +      for_each_node(node)
+ +              init_node_lock_keys(node);
+ +}
   #else
+ +static void init_node_lock_keys(int q)
+ +{
+ +}
+ +
   static inline void init_lock_keys(void)
   {
   }
@@@ -697,7 -665,27 +697,7 @@@
   static DEFINE_MUTEX(cache_chain_mutex);
   static struct list_head cache_chain;
   
- static DEFINE_PER_CPU(struct delayed_work, reap_work);
- -/*
- - * chicken and egg problem: delay the per-cpu array allocation
- - * until the general caches are up.
- - */
- -static enum {
- -      NONE,
- -      PARTIAL_AC,
- -      PARTIAL_L3,
- -      EARLY,
- -      FULL
- -} g_cpucache_up;
- -
- -/*
- - * used by boot code to determine if it can use slab based allocator
- - */
- -int slab_is_available(void)
- -{
- -      return g_cpucache_up >= EARLY;
- -}
- -
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
   
   static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
   {
@@@ -838,7 -826,7 +838,7 @@@ __setup("noaliencache", noaliencache_se
    * objects freed on different nodes from which they were allocated) and the
    * flushing of remote pcps by calling drain_node_pages.
    */
- static DEFINE_PER_CPU(unsigned long, reap_node);
+ static DEFINE_PER_CPU(unsigned long, slab_reap_node);
   
   static void init_reap_node(int cpu)
   {
@@@ -848,17 -836,17 +848,17 @@@
         if (node == MAX_NUMNODES)
                 node = first_node(node_online_map);
   
-       per_cpu(reap_node, cpu) = node;
+       per_cpu(slab_reap_node, cpu) = node;
   }
   
   static void next_reap_node(void)
   {
-       int node = __get_cpu_var(reap_node);
+       int node = __get_cpu_var(slab_reap_node);
   
         node = next_node(node, node_online_map);
         if (unlikely(node >= MAX_NUMNODES))
                 node = first_node(node_online_map);
-       __get_cpu_var(reap_node) = node;
+       __get_cpu_var(slab_reap_node) = node;
   }
   
   #else
@@@ -875,7 -863,7 +875,7 @@@
    */
   static void __cpuinit start_cpu_timer(int cpu)
   {
-       struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
+       struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
   
         /*
          * When this gets called from do_initcalls via cpucache_init(),
@@@ -1039,7 -1027,7 +1039,7 @@@ static void __drain_alien_cache(struct 
    */
   static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
   {
-       int node = __get_cpu_var(reap_node);
+       int node = __get_cpu_var(slab_reap_node);
   
         if (l3->alien) {
                 struct array_cache *ac = l3->alien[node];
@@@ -1266,8 -1254,6 +1266,8 @@@ static int __cpuinit cpuup_prepare(lon
                 kfree(shared);
                 free_alien_cache(alien);
         }
+ +      init_node_lock_keys(node);
+ +
         return 0;
   bad:
         cpuup_canceled(cpu);
@@@ -1300,9 -1286,9 +1300,9 @@@ static int __cpuinit cpuup_callback(str
                  * anything expensive but will only modify reap_work
                  * and reschedule the timer.
                 */
-               cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
+               cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
                 /* Now the cache_reaper is guaranteed to be not running. */
-               per_cpu(reap_work, cpu).work.func = NULL;
+               per_cpu(slab_reap_work, cpu).work.func = NULL;
                 break;
         case CPU_DOWN_FAILED:
         case CPU_DOWN_FAILED_FROZEN:
@@@ -3117,19 -3103,13 +3117,19 @@@ static inline void *____cache_alloc(str
         } else {
                 STATS_INC_ALLOCMISS(cachep);
                 objp = cache_alloc_refill(cachep, flags);
+ +              /*
+ +               * the 'ac' may be updated by cache_alloc_refill(),
+ +               * and kmemleak_erase() requires its correct value.
+ +               */
+ +              ac = cpu_cache_get(cachep);
         }
         /*
          * To avoid a false negative, if an object that is in one of the
          * per-CPU caches is leaked, we need to make sure kmemleak doesn't
          * treat the array pointers as a reference to the object.
          */
- -      kmemleak_erase(&ac->entry[ac->avail]);
+ +      if (objp)
+ +              kmemleak_erase(&ac->entry[ac->avail]);
         return objp;
   }
   
@@@ -3326,7 -3306,7 +3326,7 @@@ __cache_alloc_node(struct kmem_cache *c
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
   
- -      if (unlikely(nodeid == -1))
+ +      if (nodeid == -1)
                 nodeid = numa_node_id();
   
         if (unlikely(!cachep->nodelists[nodeid])) {
diff --combined mm/vmalloc.c

index 0f551a4a44cddc7a042d47bbcd85c7126569ee69,b65cfe44a5629e25f33dfcb7de95bcbbbda27b3a..9b08d790df6fe441a2b8c2002ed9f0e1a4677641
--- 1/mm/vmalloc.c
--- 2/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@@ -12,7 -12,6 +12,7 @@@
   #include <linux/mm.h>
   #include <linux/module.h>
   #include <linux/highmem.h>
+ +#include <linux/sched.h>
   #include <linux/slab.h>
   #include <linux/spinlock.h>
   #include <linux/interrupt.h>
@@@ -26,10 -25,10 +26,10 @@@
   #include <linux/rcupdate.h>
   #include <linux/pfn.h>
   #include <linux/kmemleak.h>
- -#include <linux/highmem.h>
   #include <asm/atomic.h>
   #include <asm/uaccess.h>
   #include <asm/tlbflush.h>
+ +#include <asm/shmparam.h>
   
   
   /*** Page table manipulation functions ***/
@@@ -761,7 -760,7 +761,7 @@@ static struct vmap_block *new_vmap_bloc
         spin_lock(&vbq->lock);
         list_add(&vb->free_list, &vbq->free);
         spin_unlock(&vbq->lock);
-       put_cpu_var(vmap_cpu_blocks);
+       put_cpu_var(vmap_block_queue);
   
         return vb;
   }
@@@ -826,7 -825,7 +826,7 @@@ again
                 }
                 spin_unlock(&vb->lock);
         }
-       put_cpu_var(vmap_cpu_blocks);
+       put_cpu_var(vmap_block_queue);
         rcu_read_unlock();
   
         if (!addr) {
@@@ -1157,11 -1156,12 +1157,11 @@@ static void insert_vmalloc_vm(struct vm
   }
   
   static struct vm_struct *__get_vm_area_node(unsigned long size,
- -              unsigned long flags, unsigned long start, unsigned long end,
- -              int node, gfp_t gfp_mask, void *caller)
+ +              unsigned long align, unsigned long flags, unsigned long start,
+ +              unsigned long end, int node, gfp_t gfp_mask, void *caller)
   {
         static struct vmap_area *va;
         struct vm_struct *area;
- -      unsigned long align = 1;
   
         BUG_ON(in_interrupt());
         if (flags & VM_IOREMAP) {
@@@ -1201,7 -1201,7 +1201,7 @@@
   struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
                                 unsigned long start, unsigned long end)
   {
- -      return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
+ +      return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
                                                 __builtin_return_address(0));
   }
   EXPORT_SYMBOL_GPL(__get_vm_area);
@@@ -1210,7 -1210,7 +1210,7 @@@ struct vm_struct *__get_vm_area_caller(
                                        unsigned long start, unsigned long end,
                                        void *caller)
   {
- -      return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
+ +      return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
                                   caller);
   }
   
@@@ -1225,22 -1225,22 +1225,22 @@@
    */
   struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
   {
- -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
+ +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
                                 -1, GFP_KERNEL, __builtin_return_address(0));
   }
   
   struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
                                 void *caller)
   {
- -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
+ +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
                                                 -1, GFP_KERNEL, caller);
   }
   
   struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
                                    int node, gfp_t gfp_mask)
   {
- -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
- -                                gfp_mask, __builtin_return_address(0));
+ +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
+ +                                node, gfp_mask, __builtin_return_address(0));
   }
   
   static struct vm_struct *find_vm_area(const void *addr)
@@@ -1403,8 -1403,7 +1403,8 @@@ void *vmap(struct page **pages, unsigne
   }
   EXPORT_SYMBOL(vmap);
   
- -static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+ +static void *__vmalloc_node(unsigned long size, unsigned long align,
+ +                          gfp_t gfp_mask, pgprot_t prot,
                             int node, void *caller);
   static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                                  pgprot_t prot, int node, void *caller)
@@@ -1418,7 -1417,7 +1418,7 @@@
         area->nr_pages = nr_pages;
         /* Please note that the recursion is strictly bounded. */
         if (array_size > PAGE_SIZE) {
- -              pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
+ +              pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
                                 PAGE_KERNEL, node, caller);
                 area->flags |= VM_VPAGES;
         } else {
@@@ -1477,7 -1476,6 +1477,7 @@@ void *__vmalloc_area(struct vm_struct *
   /**
    *    __vmalloc_node  -  allocate virtually contiguous memory
    *    @size:          allocation size
+ + *    @align:         desired alignment
    *    @gfp_mask:      flags for the page level allocator
    *    @prot:          protection mask for the allocated pages
    *    @node:          node to use for allocation or -1
@@@ -1487,9 -1485,8 +1487,9 @@@
    *    allocator with @gfp_mask flags.  Map them into contiguous
    *    kernel virtual space, using a pagetable protection of @prot.
    */
- -static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
- -                                              int node, void *caller)
+ +static void *__vmalloc_node(unsigned long size, unsigned long align,
+ +                          gfp_t gfp_mask, pgprot_t prot,
+ +                          int node, void *caller)
   {
         struct vm_struct *area;
         void *addr;
@@@ -1499,8 -1496,8 +1499,8 @@@
         if (!size || (size >> PAGE_SHIFT) > totalram_pages)
                 return NULL;
   
- -      area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
- -                                              node, gfp_mask, caller);
+ +      area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
+ +                                VMALLOC_END, node, gfp_mask, caller);
   
         if (!area)
                 return NULL;
@@@ -1519,7 -1516,7 +1519,7 @@@
   
   void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
   {
- -      return __vmalloc_node(size, gfp_mask, prot, -1,
+ +      return __vmalloc_node(size, 1, gfp_mask, prot, -1,
                                 __builtin_return_address(0));
   }
   EXPORT_SYMBOL(__vmalloc);
@@@ -1535,7 -1532,7 +1535,7 @@@
    */
   void *vmalloc(unsigned long size)
   {
- -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
+ +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
                                         -1, __builtin_return_address(0));
   }
   EXPORT_SYMBOL(vmalloc);
@@@ -1552,8 -1549,7 +1552,8 @@@ void *vmalloc_user(unsigned long size
         struct vm_struct *area;
         void *ret;
   
- -      ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ +      ret = __vmalloc_node(size, SHMLBA,
+ +                           GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                              PAGE_KERNEL, -1, __builtin_return_address(0));
         if (ret) {
                 area = find_vm_area(ret);
@@@ -1576,7 -1572,7 +1576,7 @@@ EXPORT_SYMBOL(vmalloc_user)
    */
   void *vmalloc_node(unsigned long size, int node)
   {
- -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
+ +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
                                         node, __builtin_return_address(0));
   }
   EXPORT_SYMBOL(vmalloc_node);
@@@ -1599,7 -1595,7 +1599,7 @@@
   
   void *vmalloc_exec(unsigned long size)
   {
- -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
+ +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
                               -1, __builtin_return_address(0));
   }
   
@@@ -1620,7 -1616,7 +1620,7 @@@
    */
   void *vmalloc_32(unsigned long size)
   {
- -      return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
+ +      return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
                               -1, __builtin_return_address(0));
   }
   EXPORT_SYMBOL(vmalloc_32);
@@@ -1637,7 -1633,7 +1637,7 @@@ void *vmalloc_32_user(unsigned long siz
         struct vm_struct *area;
         void *ret;
   
- -      ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
+ +      ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
                              -1, __builtin_return_address(0));
         if (ret) {
                 area = find_vm_area(ret);
author	Linus Torvalds <[email protected]>
	Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
committer	Linus Torvalds <[email protected]>
	Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
		1	2
arch/powerpc/kernel/perf_callchain.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/setup-common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/cell/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/nmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_cacheinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
crypto/cryptd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/base/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/cpufreq/cpufreq.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/crypto/padlock-aes.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/dmaengine.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/loopback.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/veth.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/net/netiucv.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/mballoc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_mount.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/neighbour.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/netfilter/nf_conntrack.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/lockdep.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcutorture.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/softirq.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_functions_graph.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_hw_branches.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slab.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmalloc.c	patch \|	diff1 \|	diff2 \|	blob \| history