]> Git Repo - linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
authorLinus Torvalds <[email protected]>
Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
committerLinus Torvalds <[email protected]>
Mon, 14 Dec 2009 17:58:24 +0000 (09:58 -0800)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (34 commits)
  m68k: rename global variable vmalloc_end to m68k_vmalloc_end
  percpu: add missing per_cpu_ptr_to_phys() definition for UP
  percpu: Fix kdump failure if booted with percpu_alloc=page
  percpu: make misc percpu symbols unique
  percpu: make percpu symbols in ia64 unique
  percpu: make percpu symbols in powerpc unique
  percpu: make percpu symbols in x86 unique
  percpu: make percpu symbols in xen unique
  percpu: make percpu symbols in cpufreq unique
  percpu: make percpu symbols in oprofile unique
  percpu: make percpu symbols in tracer unique
  percpu: make percpu symbols under kernel/ and mm/ unique
  percpu: remove some sparse warnings
  percpu: make alloc_percpu() handle array types
  vmalloc: fix use of non-existent percpu variable in put_cpu_var()
  this_cpu: Use this_cpu_xx in trace_functions_graph.c
  this_cpu: Use this_cpu_xx for ftrace
  this_cpu: Use this_cpu_xx in nmi handling
  this_cpu: Use this_cpu operations in RCU
  this_cpu: Use this_cpu ops for VM statistics
  ...

Fix up trivial (famous last words) global per-cpu naming conflicts in
arch/x86/kvm/svm.c
mm/slab.c

34 files changed:
1  2 
arch/powerpc/kernel/perf_callchain.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/smp.c
arch/powerpc/platforms/cell/interrupt.c
arch/x86/kernel/apic/nmi.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kvm/svm.c
arch/x86/xen/smp.c
arch/x86/xen/time.c
crypto/cryptd.c
drivers/base/cpu.c
drivers/cpufreq/cpufreq.c
drivers/crypto/padlock-aes.c
drivers/dma/dmaengine.c
drivers/net/loopback.c
drivers/net/veth.c
drivers/s390/net/netiucv.c
fs/ext4/mballoc.c
fs/xfs/xfs_mount.c
include/net/neighbour.h
include/net/netfilter/nf_conntrack.h
kernel/lockdep.c
kernel/module.c
kernel/rcutorture.c
kernel/sched.c
kernel/softirq.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_functions_graph.c
kernel/trace/trace_hw_branches.c
mm/slab.c
mm/vmalloc.c

index 936f04dbfc6f00bd1803a643753520b34e1ae8b1,fe59c44f9b5be5577c8fa69d010074616f7889b5..a3c11cac3d7154d1381d77a83ec513e5c6601c11
@@@ -119,6 -119,13 +119,6 @@@ static void perf_callchain_kernel(struc
  }
  
  #ifdef CONFIG_PPC64
 -
 -#ifdef CONFIG_HUGETLB_PAGE
 -#define is_huge_psize(pagesize)       (HPAGE_SHIFT && mmu_huge_psizes[pagesize])
 -#else
 -#define is_huge_psize(pagesize)       0
 -#endif
 -
  /*
   * On 64-bit we don't want to invoke hash_page on user addresses from
   * interrupt context, so if the access faults, we read the page tables
@@@ -128,7 -135,7 +128,7 @@@ static int read_user_stack_slow(void __
  {
        pgd_t *pgdir;
        pte_t *ptep, pte;
 -      int pagesize;
 +      unsigned shift;
        unsigned long addr = (unsigned long) ptr;
        unsigned long offset;
        unsigned long pfn;
        if (!pgdir)
                return -EFAULT;
  
 -      pagesize = get_slice_psize(current->mm, addr);
 +      ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
 +      if (!shift)
 +              shift = PAGE_SHIFT;
  
        /* align address to page boundary */
 -      offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
 +      offset = addr & ((1UL << shift) - 1);
        addr -= offset;
  
 -      if (is_huge_psize(pagesize))
 -              ptep = huge_pte_offset(current->mm, addr);
 -      else
 -              ptep = find_linux_pte(pgdir, addr);
 -
        if (ptep == NULL)
                return -EFAULT;
        pte = *ptep;
@@@ -487,11 -497,11 +487,11 @@@ static void perf_callchain_user_32(stru
   * Since we can't get PMU interrupts inside a PMU interrupt handler,
   * we don't need separate irq and nmi entries here.
   */
- static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+ static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
  
  struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
  {
-       struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+       struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
  
        entry->nr = 0;
  
index 845c72ab7357884c580a1c6f3f3217ed4987ee75,aa5aeb947bc5754a9431dd52eb30d5d7e0390985..03dd6a248198c3247af741db5f154452b2c84f2c
@@@ -157,7 -157,7 +157,7 @@@ extern u32 cpu_temp_both(unsigned long 
  #endif /* CONFIG_TAU */
  
  #ifdef CONFIG_SMP
- DEFINE_PER_CPU(unsigned int, pvr);
+ DEFINE_PER_CPU(unsigned int, cpu_pvr);
  #endif
  
  static int show_cpuinfo(struct seq_file *m, void *v)
        }
  
  #ifdef CONFIG_SMP
-       pvr = per_cpu(pvr, cpu_id);
+       pvr = per_cpu(cpu_pvr, cpu_id);
  #else
        pvr = mfspr(SPRN_PVR);
  #endif
@@@ -660,7 -660,6 +660,7 @@@ late_initcall(check_cache_coherency)
  
  #ifdef CONFIG_DEBUG_FS
  struct dentry *powerpc_debugfs_root;
 +EXPORT_SYMBOL(powerpc_debugfs_root);
  
  static int powerpc_debugfs_init(void)
  {
index 97196eefef3edccba66bfd7398b9d75962e1d5c4,2ebb48410976c9801917f9b40278163ed5ccc445..a521fb8a40ee2fcb206397cf490aab9550cf9c96
@@@ -218,9 -218,6 +218,9 @@@ void crash_send_ipi(void (*crash_ipi_ca
  
  static void stop_this_cpu(void *dummy)
  {
 +      /* Remove this CPU */
 +      set_cpu_online(smp_processor_id(), false);
 +
        local_irq_disable();
        while (1)
                ;
@@@ -235,7 -232,7 +235,7 @@@ struct thread_info *current_set[NR_CPUS
  
  static void __devinit smp_store_cpu_info(int id)
  {
-       per_cpu(pvr, id) = mfspr(SPRN_PVR);
+       per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
  }
  
  static void __init smp_create_idle(unsigned int cpu)
index f9dbf76a763f74fea41d2af522a23123f6880250,54bad901e4c9870c8c944d3bd3f981b5957ae60f..7267effc8078b53265e43898a7551f6adf2523d8
@@@ -54,7 -54,7 +54,7 @@@ struct iic 
        struct device_node *node;
  };
  
- static DEFINE_PER_CPU(struct iic, iic);
+ static DEFINE_PER_CPU(struct iic, cpu_iic);
  #define IIC_NODE_COUNT        2
  static struct irq_host *iic_host;
  
@@@ -82,13 -82,13 +82,13 @@@ static void iic_unmask(unsigned int irq
  
  static void iic_eoi(unsigned int irq)
  {
-       struct iic *iic = &__get_cpu_var(iic);
+       struct iic *iic = &__get_cpu_var(cpu_iic);
        out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
        BUG_ON(iic->eoi_ptr < 0);
  }
  
  static struct irq_chip iic_chip = {
 -      .typename = " CELL-IIC ",
 +      .name = " CELL-IIC ",
        .mask = iic_mask,
        .unmask = iic_unmask,
        .eoi = iic_eoi,
@@@ -133,7 -133,7 +133,7 @@@ static void iic_ioexc_cascade(unsigned 
  
  
  static struct irq_chip iic_ioexc_chip = {
 -      .typename = " CELL-IOEX",
 +      .name = " CELL-IOEX",
        .mask = iic_mask,
        .unmask = iic_unmask,
        .eoi = iic_ioexc_eoi,
@@@ -146,7 -146,7 +146,7 @@@ static unsigned int iic_get_irq(void
        struct iic *iic;
        unsigned int virq;
  
-       iic = &__get_cpu_var(iic);
+       iic = &__get_cpu_var(cpu_iic);
        *(unsigned long *) &pending =
                in_be64((u64 __iomem *) &iic->regs->pending_destr);
        if (!(pending.flags & CBE_IIC_IRQ_VALID))
  
  void iic_setup_cpu(void)
  {
-       out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+       out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff);
  }
  
  u8 iic_get_target_id(int cpu)
  {
-       return per_cpu(iic, cpu).target_id;
+       return per_cpu(cpu_iic, cpu).target_id;
  }
  
  EXPORT_SYMBOL_GPL(iic_get_target_id);
@@@ -181,7 -181,7 +181,7 @@@ static inline int iic_ipi_to_irq(int ip
  
  void iic_cause_IPI(int cpu, int mesg)
  {
-       out_be64(&per_cpu(iic, cpu).regs->generate, (0xf - mesg) << 4);
+       out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - mesg) << 4);
  }
  
  struct irq_host *iic_get_irq_host(int node)
@@@ -297,7 -297,7 +297,7 @@@ static int iic_host_map(struct irq_hos
  }
  
  static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
 -                         u32 *intspec, unsigned int intsize,
 +                         const u32 *intspec, unsigned int intsize,
                           irq_hw_number_t *out_hwirq, unsigned int *out_flags)
  
  {
@@@ -348,7 -348,7 +348,7 @@@ static void __init init_one_iic(unsigne
        /* XXX FIXME: should locate the linux CPU number from the HW cpu
         * number properly. We are lucky for now
         */
-       struct iic *iic = &per_cpu(iic, hw_cpu);
+       struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
  
        iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
        BUG_ON(iic->regs == NULL);
index 6389432a9dbf7f07a0dd08b4e67857c6ec899d6d,e631cc4416f7872826a79d84078181b8917ce08b..0159a69396cba449a424190459a02d83a3f417d8
@@@ -39,8 -39,7 +39,8 @@@
  int unknown_nmi_panic;
  int nmi_watchdog_enabled;
  
 -static cpumask_t backtrace_mask __read_mostly;
 +/* For reliability, we're prepared to waste bits here. */
 +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
  
  /* nmi_active:
   * >0: the lapic NMI watchdog is active, but can be disabled
@@@ -361,7 -360,7 +361,7 @@@ void stop_apic_nmi_watchdog(void *unuse
   */
  
  static DEFINE_PER_CPU(unsigned, last_irq_sum);
- static DEFINE_PER_CPU(local_t, alert_counter);
+ static DEFINE_PER_CPU(long, alert_counter);
  static DEFINE_PER_CPU(int, nmi_touch);
  
  void touch_nmi_watchdog(void)
@@@ -415,7 -414,7 +415,7 @@@ nmi_watchdog_tick(struct pt_regs *regs
        }
  
        /* We can be called before check_nmi_watchdog, hence NULL check. */
 -      if (cpumask_test_cpu(cpu, &backtrace_mask)) {
 +      if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
                static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
  
                spin_lock(&lock);
                show_regs(regs);
                dump_stack();
                spin_unlock(&lock);
 -              cpumask_clear_cpu(cpu, &backtrace_mask);
 +              cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
  
                rc = 1;
        }
                 * Ayiee, looks like this CPU is stuck ...
                 * wait a few IRQs (5 seconds) before doing the oops ...
                 */
-               local_inc(&__get_cpu_var(alert_counter));
-               if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+               __this_cpu_inc(per_cpu_var(alert_counter));
+               if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
                        /*
                         * die_nmi will return ONLY if NOTIFY_STOP happens..
                         */
                                regs, panic_on_timeout);
        } else {
                __get_cpu_var(last_irq_sum) = sum;
-               local_set(&__get_cpu_var(alert_counter), 0);
+               __this_cpu_write(per_cpu_var(alert_counter), 0);
        }
  
        /* see if the nmi watchdog went off */
@@@ -559,14 -558,14 +559,14 @@@ void arch_trigger_all_cpu_backtrace(voi
  {
        int i;
  
 -      cpumask_copy(&backtrace_mask, cpu_online_mask);
 +      cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
  
        printk(KERN_INFO "sending NMI to all CPUs:\n");
        apic->send_IPI_all(NMI_VECTOR);
  
        /* Wait for up to 10 seconds for all CPUs to do the backtrace */
        for (i = 0; i < 10 * 1000; i++) {
 -              if (cpumask_empty(&backtrace_mask))
 +              if (cpumask_empty(to_cpumask(backtrace_mask)))
                        break;
                mdelay(1);
        }
index c1afa990a6c84bf49229a9c50052b063b9731236,3192f22f2fddc9335c3d987fe83ffdb0ab0c3c99..20399b7b0c3f1a4e3a3c9d5e66a92d952d93f028
@@@ -61,7 -61,7 +61,7 @@@ void __init setup_cpu_local_masks(void
  static void __cpuinit default_init(struct cpuinfo_x86 *c)
  {
  #ifdef CONFIG_X86_64
 -      display_cacheinfo(c);
 +      cpu_detect_cache_sizes(c);
  #else
        /* Not much we can do here... */
        /* Check if at least it has cpuid */
@@@ -383,7 -383,7 +383,7 @@@ static void __cpuinit get_model_name(st
        }
  }
  
 -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
  {
        unsigned int n, dummy, ebx, ecx, edx, l2size;
  
  
        if (n >= 0x80000005) {
                cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
 -              printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
 -                              edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
                c->x86_cache_size = (ecx>>24) + (edx>>24);
  #ifdef CONFIG_X86_64
                /* On K8 L1 TLB is inclusive, so don't count it */
  #endif
  
        c->x86_cache_size = l2size;
 -
 -      printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 -                      l2size, ecx & 0xFF);
  }
  
  void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@@ -654,31 -659,24 +654,31 @@@ void __init early_cpu_init(void
        const struct cpu_dev *const *cdev;
        int count = 0;
  
 +#ifdef PROCESSOR_SELECT
        printk(KERN_INFO "KERNEL supported cpus:\n");
 +#endif
 +
        for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
                const struct cpu_dev *cpudev = *cdev;
 -              unsigned int j;
  
                if (count >= X86_VENDOR_NUM)
                        break;
                cpu_devs[count] = cpudev;
                count++;
  
 -              for (j = 0; j < 2; j++) {
 -                      if (!cpudev->c_ident[j])
 -                              continue;
 -                      printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
 -                              cpudev->c_ident[j]);
 +#ifdef PROCESSOR_SELECT
 +              {
 +                      unsigned int j;
 +
 +                      for (j = 0; j < 2; j++) {
 +                              if (!cpudev->c_ident[j])
 +                                      continue;
 +                              printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
 +                                      cpudev->c_ident[j]);
 +                      }
                }
 +#endif
        }
 -
        early_identify_cpu(&boot_cpu_data);
  }
  
@@@ -839,8 -837,10 +839,8 @@@ static void __cpuinit identify_cpu(stru
                        boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
        }
  
 -#ifdef CONFIG_X86_MCE
        /* Init Machine Check Exception if available. */
 -      mcheck_init(c);
 -#endif
 +      mcheck_cpu_init(c);
  
        select_idle_routine(c);
  
@@@ -1093,7 -1093,7 +1093,7 @@@ static void clear_all_debug_regs(void
  
  void __cpuinit cpu_init(void)
  {
-       struct orig_ist *orig_ist;
+       struct orig_ist *oist;
        struct task_struct *me;
        struct tss_struct *t;
        unsigned long v;
  
        cpu = stack_smp_processor_id();
        t = &per_cpu(init_tss, cpu);
-       orig_ist = &per_cpu(orig_ist, cpu);
+       oist = &per_cpu(orig_ist, cpu);
  
  #ifdef CONFIG_NUMA
        if (cpu != 0 && percpu_read(node_number) == 0 &&
        wrmsrl(MSR_KERNEL_GS_BASE, 0);
        barrier();
  
 -      check_efer();
 +      x86_configure_nx();
        if (cpu != 0)
                enable_x2apic();
  
        /*
         * set up and load the per-CPU TSS
         */
-       if (!orig_ist->ist[0]) {
+       if (!oist->ist[0]) {
                char *estacks = per_cpu(exception_stacks, cpu);
  
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                        estacks += exception_stack_sizes[v];
-                       orig_ist->ist[v] = t->x86_tss.ist[v] =
+                       oist->ist[v] = t->x86_tss.ist[v] =
                                        (unsigned long)estacks;
                }
        }
index d2e7c77c1ea4901a8e3e6b2a70c99b5c991132f3,43eb3465dda73bb7314ca3c45f2f007c7e4d28cf..f28decf8dde3990626f493e5d962bfea9d48e59f
@@@ -68,9 -68,9 +68,9 @@@ struct acpi_cpufreq_data 
        unsigned int cpu_feature;
  };
  
- static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
  
- static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+ static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
  
  /* acpi_perf_data is a pointer to percpu data. */
  static struct acpi_processor_performance *acpi_perf_data;
@@@ -214,14 -214,14 +214,14 @@@ static u32 get_cur_val(const struct cpu
        if (unlikely(cpumask_empty(mask)))
                return 0;
  
-       switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
+       switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
        case SYSTEM_INTEL_MSR_CAPABLE:
                cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
                cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
                break;
        case SYSTEM_IO_CAPABLE:
                cmd.type = SYSTEM_IO_CAPABLE;
-               perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
+               perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
                cmd.addr.io.port = perf->control_register.address;
                cmd.addr.io.bit_width = perf->control_register.bit_width;
                break;
@@@ -268,8 -268,8 +268,8 @@@ static unsigned int get_measured_perf(s
        if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
                return 0;
  
-       ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
-       per_cpu(old_perf, cpu) = perf;
+       ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+       per_cpu(acfreq_old_perf, cpu) = perf;
  
        retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
  
  
  static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
  {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
        unsigned int freq;
        unsigned int cached_freq;
  
@@@ -322,7 -322,7 +322,7 @@@ static unsigned int check_freqs(const s
  static int acpi_cpufreq_target(struct cpufreq_policy *policy,
                               unsigned int target_freq, unsigned int relation)
  {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
        struct acpi_processor_performance *perf;
        struct cpufreq_freqs freqs;
        struct drv_cmd cmd;
@@@ -416,7 -416,7 +416,7 @@@ out
  
  static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
  {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
  
        dprintk("acpi_cpufreq_verify\n");
  
@@@ -526,21 -526,15 +526,21 @@@ static const struct dmi_system_id sw_an
  
  static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
  {
 -      /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
 +      /* Intel Xeon Processor 7100 Series Specification Update
 +       * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
         * AL30: A Machine Check Exception (MCE) Occurring during an
         * Enhanced Intel SpeedStep Technology Ratio Change May Cause
 -       * Both Processor Cores to Lock Up when HT is enabled*/
 +       * Both Processor Cores to Lock Up*/
        if (c->x86_vendor == X86_VENDOR_INTEL) {
                if ((c->x86 == 15) &&
                    (c->x86_model == 6) &&
 -                  (c->x86_mask == 8) && smt_capable())
 +                  (c->x86_mask == 8)) {
 +                      printk(KERN_INFO "acpi-cpufreq: Intel(R) "
 +                          "Xeon(R) 7100 Errata AL30, processors may "
 +                          "lock up on frequency changes: disabling "
 +                          "acpi-cpufreq.\n");
                        return -ENODEV;
 +                  }
                }
        return 0;
  }
@@@ -555,18 -549,13 +555,18 @@@ static int acpi_cpufreq_cpu_init(struc
        unsigned int result = 0;
        struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
        struct acpi_processor_performance *perf;
 +#ifdef CONFIG_SMP
 +      static int blacklisted;
 +#endif
  
        dprintk("acpi_cpufreq_cpu_init\n");
  
  #ifdef CONFIG_SMP
 -      result = acpi_cpufreq_blacklist(c);
 -      if (result)
 -              return result;
 +      if (blacklisted)
 +              return blacklisted;
 +      blacklisted = acpi_cpufreq_blacklist(c);
 +      if (blacklisted)
 +              return blacklisted;
  #endif
  
        data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
                return -ENOMEM;
  
        data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
-       per_cpu(drv_data, cpu) = data;
+       per_cpu(acfreq_data, cpu) = data;
  
        if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
                acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@@ -725,20 -714,20 +725,20 @@@ err_unreg
        acpi_processor_unregister_performance(perf, cpu);
  err_free:
        kfree(data);
-       per_cpu(drv_data, cpu) = NULL;
+       per_cpu(acfreq_data, cpu) = NULL;
  
        return result;
  }
  
  static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
  {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
  
        dprintk("acpi_cpufreq_cpu_exit\n");
  
        if (data) {
                cpufreq_frequency_table_put_attr(policy->cpu);
-               per_cpu(drv_data, policy->cpu) = NULL;
+               per_cpu(acfreq_data, policy->cpu) = NULL;
                acpi_processor_unregister_performance(data->acpi_data,
                                                      policy->cpu);
                kfree(data);
  
  static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
  {
-       struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+       struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
  
        dprintk("acpi_cpufreq_resume\n");
  
@@@ -764,15 -753,14 +764,15 @@@ static struct freq_attr *acpi_cpufreq_a
  };
  
  static struct cpufreq_driver acpi_cpufreq_driver = {
 -      .verify = acpi_cpufreq_verify,
 -      .target = acpi_cpufreq_target,
 -      .init = acpi_cpufreq_cpu_init,
 -      .exit = acpi_cpufreq_cpu_exit,
 -      .resume = acpi_cpufreq_resume,
 -      .name = "acpi-cpufreq",
 -      .owner = THIS_MODULE,
 -      .attr = acpi_cpufreq_attr,
 +      .verify         = acpi_cpufreq_verify,
 +      .target         = acpi_cpufreq_target,
 +      .bios_limit     = acpi_processor_get_bios_limit,
 +      .init           = acpi_cpufreq_cpu_init,
 +      .exit           = acpi_cpufreq_cpu_exit,
 +      .resume         = acpi_cpufreq_resume,
 +      .name           = "acpi-cpufreq",
 +      .owner          = THIS_MODULE,
 +      .attr           = acpi_cpufreq_attr,
  };
  
  static int __init acpi_cpufreq_init(void)
index 6c40f6b5b340b031192232bdc946d62c7076d0cc,f5ccb4fa5a5d7d00ed1a4a8dce0db5a7a1cabd44..0c06bca2a1dcc1dc68003aa0bdde3935b254967d
@@@ -94,7 -94,7 +94,7 @@@ static const struct _cache_table __cpui
        { 0xd1, LVL_3,    1024 },       /* 4-way set assoc, 64 byte line size */
        { 0xd2, LVL_3,    2048 },       /* 4-way set assoc, 64 byte line size */
        { 0xd6, LVL_3,    1024 },       /* 8-way set assoc, 64 byte line size */
 -      { 0xd7, LVL_3,    2038 },       /* 8-way set assoc, 64 byte line size */
 +      { 0xd7, LVL_3,    2048 },       /* 8-way set assoc, 64 byte line size */
        { 0xd8, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
        { 0xdc, LVL_3,    2048 },       /* 12-way set assoc, 64 byte line size */
        { 0xdd, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
        { 0xe2, LVL_3,    2048 },       /* 16-way set assoc, 64 byte line size */
        { 0xe3, LVL_3,    4096 },       /* 16-way set assoc, 64 byte line size */
        { 0xe4, LVL_3,    8192 },       /* 16-way set assoc, 64 byte line size */
 +      { 0xea, LVL_3,    12288 },      /* 24-way set assoc, 64 byte line size */
 +      { 0xeb, LVL_3,    18432 },      /* 24-way set assoc, 64 byte line size */
 +      { 0xec, LVL_3,    24576 },      /* 24-way set assoc, 64 byte line size */
        { 0x00, 0, 0}
  };
  
@@@ -491,6 -488,22 +491,6 @@@ unsigned int __cpuinit init_intel_cache
  #endif
        }
  
 -      if (trace)
 -              printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
 -      else if (l1i)
 -              printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
 -
 -      if (l1d)
 -              printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
 -      else
 -              printk(KERN_CONT "\n");
 -
 -      if (l2)
 -              printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
 -
 -      if (l3)
 -              printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
 -
        c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
  
        return l2;
  #ifdef CONFIG_SYSFS
  
  /* pointer to _cpuid4_info array (for each cache leaf) */
- static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
- #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
+ #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
  
  #ifdef CONFIG_SMP
  static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
        if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
                struct cpuinfo_x86 *d;
                for_each_online_cpu(i) {
-                       if (!per_cpu(cpuid4_info, i))
+                       if (!per_cpu(ici_cpuid4_info, i))
                                continue;
                        d = &cpu_data(i);
                        this_leaf = CPUID4_INFO_IDX(i, index);
                            c->apicid >> index_msb) {
                                cpumask_set_cpu(i,
                                        to_cpumask(this_leaf->shared_cpu_map));
-                               if (i != cpu && per_cpu(cpuid4_info, i))  {
+                               if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
                                        sibling_leaf =
                                                CPUID4_INFO_IDX(i, index);
                                        cpumask_set_cpu(cpu, to_cpumask(
@@@ -574,8 -587,8 +574,8 @@@ static void __cpuinit free_cache_attrib
        for (i = 0; i < num_cache_leaves; i++)
                cache_remove_shared_cpu_map(cpu, i);
  
-       kfree(per_cpu(cpuid4_info, cpu));
-       per_cpu(cpuid4_info, cpu) = NULL;
+       kfree(per_cpu(ici_cpuid4_info, cpu));
+       per_cpu(ici_cpuid4_info, cpu) = NULL;
  }
  
  static int
@@@ -614,15 -627,15 +614,15 @@@ static int __cpuinit detect_cache_attri
        if (num_cache_leaves == 0)
                return -ENOENT;
  
-       per_cpu(cpuid4_info, cpu) = kzalloc(
+       per_cpu(ici_cpuid4_info, cpu) = kzalloc(
            sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-       if (per_cpu(cpuid4_info, cpu) == NULL)
+       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
                return -ENOMEM;
  
        smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
        if (retval) {
-               kfree(per_cpu(cpuid4_info, cpu));
-               per_cpu(cpuid4_info, cpu) = NULL;
+               kfree(per_cpu(ici_cpuid4_info, cpu));
+               per_cpu(ici_cpuid4_info, cpu) = NULL;
        }
  
        return retval;
  extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
  
  /* pointer to kobject for cpuX/cache */
- static DEFINE_PER_CPU(struct kobject *, cache_kobject);
+ static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
  
  struct _index_kobject {
        struct kobject kobj;
  };
  
  /* pointer to array of kobjects for cpuX/cache/indexY */
- static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
- #define INDEX_KOBJECT_PTR(x, y)               (&((per_cpu(index_kobject, x))[y]))
+ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
+ #define INDEX_KOBJECT_PTR(x, y)               (&((per_cpu(ici_index_kobject, x))[y]))
  
  #define show_one_plus(file_name, object, val)                         \
  static ssize_t show_##file_name                                               \
@@@ -863,10 -876,10 +863,10 @@@ static struct kobj_type ktype_percpu_en
  
  static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
  {
-       kfree(per_cpu(cache_kobject, cpu));
-       kfree(per_cpu(index_kobject, cpu));
-       per_cpu(cache_kobject, cpu) = NULL;
-       per_cpu(index_kobject, cpu) = NULL;
+       kfree(per_cpu(ici_cache_kobject, cpu));
+       kfree(per_cpu(ici_index_kobject, cpu));
+       per_cpu(ici_cache_kobject, cpu) = NULL;
+       per_cpu(ici_index_kobject, cpu) = NULL;
        free_cache_attributes(cpu);
  }
  
@@@ -882,14 -895,14 +882,14 @@@ static int __cpuinit cpuid4_cache_sysfs
                return err;
  
        /* Allocate all required memory */
-       per_cpu(cache_kobject, cpu) =
+       per_cpu(ici_cache_kobject, cpu) =
                kzalloc(sizeof(struct kobject), GFP_KERNEL);
-       if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
+       if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
                goto err_out;
  
-       per_cpu(index_kobject, cpu) = kzalloc(
+       per_cpu(ici_index_kobject, cpu) = kzalloc(
            sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
-       if (unlikely(per_cpu(index_kobject, cpu) == NULL))
+       if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
                goto err_out;
  
        return 0;
@@@ -913,7 -926,7 +913,7 @@@ static int __cpuinit cache_add_dev(stru
        if (unlikely(retval < 0))
                return retval;
  
-       retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
+       retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
                                      &ktype_percpu_entry,
                                      &sys_dev->kobj, "%s", "cache");
        if (retval < 0) {
                this_object->index = i;
                retval = kobject_init_and_add(&(this_object->kobj),
                                              &ktype_cache,
-                                             per_cpu(cache_kobject, cpu),
+                                             per_cpu(ici_cache_kobject, cpu),
                                              "index%1lu", i);
                if (unlikely(retval)) {
                        for (j = 0; j < i; j++)
                                kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
-                       kobject_put(per_cpu(cache_kobject, cpu));
+                       kobject_put(per_cpu(ici_cache_kobject, cpu));
                        cpuid4_cache_sysfs_exit(cpu);
                        return retval;
                }
        }
        cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
  
-       kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
+       kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
        return 0;
  }
  
@@@ -949,7 -962,7 +949,7 @@@ static void __cpuinit cache_remove_dev(
        unsigned int cpu = sys_dev->id;
        unsigned long i;
  
-       if (per_cpu(cpuid4_info, cpu) == NULL)
+       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
                return;
        if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
                return;
  
        for (i = 0; i < num_cache_leaves; i++)
                kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
-       kobject_put(per_cpu(cache_kobject, cpu));
+       kobject_put(per_cpu(ici_cache_kobject, cpu));
        cpuid4_cache_sysfs_exit(cpu);
  }
  
diff --combined arch/x86/kvm/svm.c
index 3de0b37ec038673c3a70b4f14be7dcd5656dfcfe,6c79a14a3b6f8c784c6f6118ece14fd4e2bdd6b3..1d9b33843c80ef521dc059697285cfed06cfd7d7
@@@ -46,7 -46,6 +46,7 @@@ MODULE_LICENSE("GPL")
  #define SVM_FEATURE_NPT  (1 << 0)
  #define SVM_FEATURE_LBRV (1 << 1)
  #define SVM_FEATURE_SVML (1 << 2)
 +#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
  
  #define NESTED_EXIT_HOST      0       /* Exit handled on host level */
  #define NESTED_EXIT_DONE      1       /* Exit caused nested vmexit  */
  
  #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
  
 -/* Turn on to get debugging output*/
 -/* #define NESTED_DEBUG */
 -
 -#ifdef NESTED_DEBUG
 -#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
 -#else
 -#define nsvm_printk(fmt, args...) do {} while(0)
 -#endif
 -
  static const u32 host_save_user_msrs[] = {
  #ifdef CONFIG_X86_64
        MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@@ -77,9 -85,6 +77,9 @@@ struct nested_state 
        /* gpa pointers to the real vectors */
        u64 vmcb_msrpm;
  
 +      /* A VMEXIT is required but not yet emulated */
 +      bool exit_required;
 +
        /* cache for intercepts of the guest */
        u16 intercept_cr_read;
        u16 intercept_cr_write;
@@@ -107,8 -112,6 +107,8 @@@ struct vcpu_svm 
        u32 *msrpm;
  
        struct nested_state nested;
 +
 +      bool nmi_singlestep;
  };
  
  /* enable NPT for AMD64 and X86 with PAE */
@@@ -283,7 -286,7 +283,7 @@@ static void skip_emulated_instruction(s
        struct vcpu_svm *svm = to_svm(vcpu);
  
        if (!svm->next_rip) {
 -              if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
 +              if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
                                EMULATE_DONE)
                        printk(KERN_DEBUG "%s: NOP\n", __func__);
                return;
@@@ -313,81 -316,74 +313,79 @@@ static void svm_hardware_disable(void *
        cpu_svm_disable();
  }
  
 -static void svm_hardware_enable(void *garbage)
 +static int svm_hardware_enable(void *garbage)
  {
  
-       struct svm_cpu_data *svm_data;
+       struct svm_cpu_data *sd;
        uint64_t efer;
        struct descriptor_table gdt_descr;
        struct desc_struct *gdt;
        int me = raw_smp_processor_id();
  
 +      rdmsrl(MSR_EFER, efer);
 +      if (efer & EFER_SVME)
 +              return -EBUSY;
 +
        if (!has_svm()) {
 -              printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
 -              return;
 +              printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
 +                     me);
 +              return -EINVAL;
        }
-       svm_data = per_cpu(svm_data, me);
+       sd = per_cpu(svm_data, me);
  
-       if (!svm_data) {
+       if (!sd) {
 -              printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
 +              printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
                       me);
 -              return;
 +              return -EINVAL;
        }
  
-       svm_data->asid_generation = 1;
-       svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
-       svm_data->next_asid = svm_data->max_asid + 1;
+       sd->asid_generation = 1;
+       sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
+       sd->next_asid = sd->max_asid + 1;
  
        kvm_get_gdt(&gdt_descr);
        gdt = (struct desc_struct *)gdt_descr.base;
-       svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
+       sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
  
 -      rdmsrl(MSR_EFER, efer);
        wrmsrl(MSR_EFER, efer | EFER_SVME);
  
--      wrmsrl(MSR_VM_HSAVE_PA,
-              page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
 -             page_to_pfn(sd->save_area) << PAGE_SHIFT);
++      wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
 +
 +      return 0;
  }
  
  static void svm_cpu_uninit(int cpu)
  {
-       struct svm_cpu_data *svm_data
-               = per_cpu(svm_data, raw_smp_processor_id());
+       struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
  
-       if (!svm_data)
+       if (!sd)
                return;
  
        per_cpu(svm_data, raw_smp_processor_id()) = NULL;
-       __free_page(svm_data->save_area);
-       kfree(svm_data);
+       __free_page(sd->save_area);
+       kfree(sd);
  }
  
  static int svm_cpu_init(int cpu)
  {
-       struct svm_cpu_data *svm_data;
+       struct svm_cpu_data *sd;
        int r;
  
-       svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
-       if (!svm_data)
+       sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
+       if (!sd)
                return -ENOMEM;
-       svm_data->cpu = cpu;
-       svm_data->save_area = alloc_page(GFP_KERNEL);
+       sd->cpu = cpu;
+       sd->save_area = alloc_page(GFP_KERNEL);
        r = -ENOMEM;
-       if (!svm_data->save_area)
+       if (!sd->save_area)
                goto err_1;
  
-       per_cpu(svm_data, cpu) = svm_data;
+       per_cpu(svm_data, cpu) = sd;
  
        return 0;
  
  err_1:
-       kfree(svm_data);
+       kfree(sd);
        return r;
  
  }
@@@ -479,7 -475,7 +477,7 @@@ static __init int svm_hardware_setup(vo
                kvm_enable_efer_bits(EFER_SVME);
        }
  
 -      for_each_online_cpu(cpu) {
 +      for_each_possible_cpu(cpu) {
                r = svm_cpu_init(cpu);
                if (r)
                        goto err;
@@@ -513,7 -509,7 +511,7 @@@ static __exit void svm_hardware_unsetup
  {
        int cpu;
  
 -      for_each_online_cpu(cpu)
 +      for_each_possible_cpu(cpu)
                svm_cpu_uninit(cpu);
  
        __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@@ -628,12 -624,11 +626,12 @@@ static void init_vmcb(struct vcpu_svm *
        save->rip = 0x0000fff0;
        svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
  
 -      /*
 -       * cr0 val on cpu init should be 0x60000010, we enable cpu
 -       * cache by default. the orderly way is to enable cache in bios.
 +      /* This is the guest-visible cr0 value.
 +       * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
         */
 -      save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
 +      svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
 +      kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
 +
        save->cr4 = X86_CR4_PAE;
        /* rdx = ?? */
  
                control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
                                                 INTERCEPT_CR3_MASK);
                save->g_pat = 0x0007040600070406ULL;
 -              /* enable caching because the QEMU Bios doesn't enable it */
 -              save->cr0 = X86_CR0_ET;
                save->cr3 = 0;
                save->cr4 = 0;
        }
        svm->nested.vmcb = 0;
        svm->vcpu.arch.hflags = 0;
  
 +      if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
 +              control->pause_filter_count = 3000;
 +              control->intercept |= (1ULL << INTERCEPT_PAUSE);
 +      }
 +
        enable_gif(svm);
  }
  
@@@ -765,16 -757,15 +763,16 @@@ static void svm_vcpu_load(struct kvm_vc
        int i;
  
        if (unlikely(cpu != vcpu->cpu)) {
 -              u64 tsc_this, delta;
 +              u64 delta;
  
                /*
                 * Make sure that the guest sees a monotonically
                 * increasing TSC.
                 */
 -              rdtscll(tsc_this);
 -              delta = vcpu->arch.host_tsc - tsc_this;
 +              delta = vcpu->arch.host_tsc - native_read_tsc();
                svm->vmcb->control.tsc_offset += delta;
 +              if (is_nested(svm))
 +                      svm->nested.hsave->control.tsc_offset += delta;
                vcpu->cpu = cpu;
                kvm_migrate_timers(vcpu);
                svm->asid_generation = 0;
@@@ -793,7 -784,7 +791,7 @@@ static void svm_vcpu_put(struct kvm_vcp
        for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
                wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
  
 -      rdtscll(vcpu->arch.host_tsc);
 +      vcpu->arch.host_tsc = native_read_tsc();
  }
  
  static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@@ -1051,7 -1042,7 +1049,7 @@@ static void update_db_intercept(struct 
        svm->vmcb->control.intercept_exceptions &=
                ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
  
 -      if (vcpu->arch.singlestep)
 +      if (svm->nmi_singlestep)
                svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
  
        if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
                vcpu->guest_debug = 0;
  }
  
 -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 +static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
  {
 -      int old_debug = vcpu->guest_debug;
        struct vcpu_svm *svm = to_svm(vcpu);
  
 -      vcpu->guest_debug = dbg->control;
 -
 -      update_db_intercept(vcpu);
 -
        if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
                svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
        else
                svm->vmcb->save.dr7 = vcpu->arch.dr7;
  
 -      if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 -              svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
 -      else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
 -              svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
 -
 -      return 0;
 +      update_db_intercept(vcpu);
  }
  
  static void load_host_msrs(struct kvm_vcpu *vcpu)
@@@ -1092,16 -1093,16 +1090,16 @@@ static void save_host_msrs(struct kvm_v
  #endif
  }
  
- static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
+ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
  {
-       if (svm_data->next_asid > svm_data->max_asid) {
-               ++svm_data->asid_generation;
-               svm_data->next_asid = 1;
+       if (sd->next_asid > sd->max_asid) {
+               ++sd->asid_generation;
+               sd->next_asid = 1;
                svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
        }
  
-       svm->asid_generation = svm_data->asid_generation;
-       svm->vmcb->control.asid = svm_data->next_asid++;
+       svm->asid_generation = sd->asid_generation;
+       svm->vmcb->control.asid = sd->next_asid++;
  }
  
  static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
@@@ -1176,7 -1177,7 +1174,7 @@@ static void svm_set_dr(struct kvm_vcpu 
        }
  }
  
 -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int pf_interception(struct vcpu_svm *svm)
  {
        u64 fault_address;
        u32 error_code;
        return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
  }
  
 -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int db_interception(struct vcpu_svm *svm)
  {
 +      struct kvm_run *kvm_run = svm->vcpu.run;
 +
        if (!(svm->vcpu.guest_debug &
              (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
 -              !svm->vcpu.arch.singlestep) {
 +              !svm->nmi_singlestep) {
                kvm_queue_exception(&svm->vcpu, DB_VECTOR);
                return 1;
        }
  
 -      if (svm->vcpu.arch.singlestep) {
 -              svm->vcpu.arch.singlestep = false;
 +      if (svm->nmi_singlestep) {
 +              svm->nmi_singlestep = false;
                if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
                        svm->vmcb->save.rflags &=
                                ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
        return 1;
  }
  
 -static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int bp_interception(struct vcpu_svm *svm)
  {
 +      struct kvm_run *kvm_run = svm->vcpu.run;
 +
        kvm_run->exit_reason = KVM_EXIT_DEBUG;
        kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
        kvm_run->debug.arch.exception = BP_VECTOR;
        return 0;
  }
  
 -static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int ud_interception(struct vcpu_svm *svm)
  {
        int er;
  
 -      er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
 +      er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
        if (er != EMULATE_DONE)
                kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
  }
  
 -static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int nm_interception(struct vcpu_svm *svm)
  {
        svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
        if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
        return 1;
  }
  
 -static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int mc_interception(struct vcpu_svm *svm)
  {
        /*
         * On an #MC intercept the MCE handler is not called automatically in
        return 1;
  }
  
 -static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int shutdown_interception(struct vcpu_svm *svm)
  {
 +      struct kvm_run *kvm_run = svm->vcpu.run;
 +
        /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
        return 0;
  }
  
 -static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int io_interception(struct vcpu_svm *svm)
  {
        u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
        int size, in, string;
  
        if (string) {
                if (emulate_instruction(&svm->vcpu,
 -                                      kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
 +                                      0, 0, 0) == EMULATE_DO_MMIO)
                        return 0;
                return 1;
        }
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
  
        skip_emulated_instruction(&svm->vcpu);
 -      return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 +      return kvm_emulate_pio(&svm->vcpu, in, size, port);
  }
  
 -static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int nmi_interception(struct vcpu_svm *svm)
  {
        return 1;
  }
  
 -static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int intr_interception(struct vcpu_svm *svm)
  {
        ++svm->vcpu.stat.irq_exits;
        return 1;
  }
  
 -static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int nop_on_interception(struct vcpu_svm *svm)
  {
        return 1;
  }
  
 -static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int halt_interception(struct vcpu_svm *svm)
  {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
        skip_emulated_instruction(&svm->vcpu);
        return kvm_emulate_halt(&svm->vcpu);
  }
  
 -static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int vmmcall_interception(struct vcpu_svm *svm)
  {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        skip_emulated_instruction(&svm->vcpu);
@@@ -1380,15 -1375,8 +1378,15 @@@ static inline int nested_svm_intr(struc
  
        svm->vmcb->control.exit_code = SVM_EXIT_INTR;
  
 -      if (nested_svm_exit_handled(svm)) {
 -              nsvm_printk("VMexit -> INTR\n");
 +      if (svm->nested.intercept & 1ULL) {
 +              /*
 +               * The #vmexit can't be emulated here directly because this
 +               * code path runs with irqs and preemtion disabled. A
 +               * #vmexit emulation might sleep. Only signal request for
 +               * the #vmexit here.
 +               */
 +              svm->nested.exit_required = true;
 +              trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
                return 1;
        }
  
@@@ -1399,7 -1387,10 +1397,7 @@@ static void *nested_svm_map(struct vcpu
  {
        struct page *page;
  
 -      down_read(&current->mm->mmap_sem);
        page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
 -      up_read(&current->mm->mmap_sem);
 -
        if (is_error_page(page))
                goto error;
  
@@@ -1538,12 -1529,14 +1536,12 @@@ static int nested_svm_exit_handled(stru
        }
        default: {
                u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
 -              nsvm_printk("exit code: 0x%x\n", exit_code);
                if (svm->nested.intercept & exit_bits)
                        vmexit = NESTED_EXIT_DONE;
        }
        }
  
        if (vmexit == NESTED_EXIT_DONE) {
 -              nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
                nested_svm_vmexit(svm);
        }
  
@@@ -1588,12 -1581,6 +1586,12 @@@ static int nested_svm_vmexit(struct vcp
        struct vmcb *hsave = svm->nested.hsave;
        struct vmcb *vmcb = svm->vmcb;
  
 +      trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
 +                                     vmcb->control.exit_info_1,
 +                                     vmcb->control.exit_info_2,
 +                                     vmcb->control.exit_int_info,
 +                                     vmcb->control.exit_int_info_err);
 +
        nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
        if (!nested_vmcb)
                return 1;
        nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
        nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
        nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
 +
 +      /*
 +       * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
 +       * to make sure that we do not lose injected events. So check event_inj
 +       * here and copy it to exit_int_info if it is valid.
 +       * Exit_int_info and event_inj can't be both valid because the case
 +       * below only happens on a VMRUN instruction intercept which has
 +       * no valid exit_int_info set.
 +       */
 +      if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
 +              struct vmcb_control_area *nc = &nested_vmcb->control;
 +
 +              nc->exit_int_info     = vmcb->control.event_inj;
 +              nc->exit_int_info_err = vmcb->control.event_inj_err;
 +      }
 +
        nested_vmcb->control.tlb_ctl           = 0;
        nested_vmcb->control.event_inj         = 0;
        nested_vmcb->control.event_inj_err     = 0;
        /* Restore the original control entries */
        copy_vmcb_control_area(vmcb, hsave);
  
 -      /* Kill any pending exceptions */
 -      if (svm->vcpu.arch.exception.pending == true)
 -              nsvm_printk("WARNING: Pending Exception\n");
 -
        kvm_clear_exception_queue(&svm->vcpu);
        kvm_clear_interrupt_queue(&svm->vcpu);
  
@@@ -1724,12 -1699,6 +1722,12 @@@ static bool nested_svm_vmrun(struct vcp
        /* nested_vmcb is our indicator if nested SVM is activated */
        svm->nested.vmcb = svm->vmcb->save.rax;
  
 +      trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
 +                             nested_vmcb->save.rip,
 +                             nested_vmcb->control.int_ctl,
 +                             nested_vmcb->control.event_inj,
 +                             nested_vmcb->control.nested_ctl);
 +
        /* Clear internal status */
        kvm_clear_exception_queue(&svm->vcpu);
        kvm_clear_interrupt_queue(&svm->vcpu);
        svm->nested.intercept            = nested_vmcb->control.intercept;
  
        force_new_asid(&svm->vcpu);
 -      svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
 -      svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
        svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
 -      if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
 -              nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
 -                              nested_vmcb->control.int_ctl);
 -      }
        if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                svm->vcpu.arch.hflags |= HF_VINTR_MASK;
        else
                svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
  
 -      nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
 -                      nested_vmcb->control.exit_int_info,
 -                      nested_vmcb->control.int_state);
 -
        svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
        svm->vmcb->control.int_state = nested_vmcb->control.int_state;
        svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
 -      if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
 -              nsvm_printk("Injecting Event: 0x%x\n",
 -                              nested_vmcb->control.event_inj);
        svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
        svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
  
@@@ -1852,7 -1834,7 +1850,7 @@@ static void nested_svm_vmloadsave(struc
        to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
  }
  
 -static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int vmload_interception(struct vcpu_svm *svm)
  {
        struct vmcb *nested_vmcb;
  
        return 1;
  }
  
 -static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int vmsave_interception(struct vcpu_svm *svm)
  {
        struct vmcb *nested_vmcb;
  
        return 1;
  }
  
 -static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int vmrun_interception(struct vcpu_svm *svm)
  {
 -      nsvm_printk("VMrun\n");
 -
        if (nested_svm_check_permissions(svm))
                return 1;
  
@@@ -1920,7 -1904,7 +1918,7 @@@ failed
        return 1;
  }
  
 -static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int stgi_interception(struct vcpu_svm *svm)
  {
        if (nested_svm_check_permissions(svm))
                return 1;
        return 1;
  }
  
 -static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int clgi_interception(struct vcpu_svm *svm)
  {
        if (nested_svm_check_permissions(svm))
                return 1;
        return 1;
  }
  
 -static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int invlpga_interception(struct vcpu_svm *svm)
  {
        struct kvm_vcpu *vcpu = &svm->vcpu;
 -      nsvm_printk("INVLPGA\n");
 +
 +      trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
 +                        vcpu->arch.regs[VCPU_REGS_RAX]);
  
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
        kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
        return 1;
  }
  
 -static int invalid_op_interception(struct vcpu_svm *svm,
 -                                 struct kvm_run *kvm_run)
 +static int skinit_interception(struct vcpu_svm *svm)
  {
 +      trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
 +
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
  }
  
 -static int task_switch_interception(struct vcpu_svm *svm,
 -                                  struct kvm_run *kvm_run)
 +static int invalid_op_interception(struct vcpu_svm *svm)
 +{
 +      kvm_queue_exception(&svm->vcpu, UD_VECTOR);
 +      return 1;
 +}
 +
 +static int task_switch_interception(struct vcpu_svm *svm)
  {
        u16 tss_selector;
        int reason;
        return kvm_task_switch(&svm->vcpu, tss_selector, reason);
  }
  
 -static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int cpuid_interception(struct vcpu_svm *svm)
  {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
        kvm_emulate_cpuid(&svm->vcpu);
        return 1;
  }
  
 -static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int iret_interception(struct vcpu_svm *svm)
  {
        ++svm->vcpu.stat.nmi_window_exits;
        svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
        return 1;
  }
  
 -static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int invlpg_interception(struct vcpu_svm *svm)
  {
 -      if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
 +      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
        return 1;
  }
  
 -static int emulate_on_interception(struct vcpu_svm *svm,
 -                                 struct kvm_run *kvm_run)
 +static int emulate_on_interception(struct vcpu_svm *svm)
  {
 -      if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
 +      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
                pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
        return 1;
  }
  
 -static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int cr8_write_interception(struct vcpu_svm *svm)
  {
 +      struct kvm_run *kvm_run = svm->vcpu.run;
 +
        u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
        /* instruction emulation calls kvm_set_cr8() */
 -      emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
 +      emulate_instruction(&svm->vcpu, 0, 0, 0);
        if (irqchip_in_kernel(svm->vcpu.kvm)) {
                svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
                return 1;
@@@ -2081,14 -2056,10 +2079,14 @@@ static int svm_get_msr(struct kvm_vcpu 
  
        switch (ecx) {
        case MSR_IA32_TSC: {
 -              u64 tsc;
 +              u64 tsc_offset;
  
 -              rdtscll(tsc);
 -              *data = svm->vmcb->control.tsc_offset + tsc;
 +              if (is_nested(svm))
 +                      tsc_offset = svm->nested.hsave->control.tsc_offset;
 +              else
 +                      tsc_offset = svm->vmcb->control.tsc_offset;
 +
 +              *data = tsc_offset + native_read_tsc();
                break;
        }
        case MSR_K6_STAR:
        return 0;
  }
  
 -static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int rdmsr_interception(struct vcpu_svm *svm)
  {
        u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
        u64 data;
@@@ -2174,17 -2145,10 +2172,17 @@@ static int svm_set_msr(struct kvm_vcpu 
  
        switch (ecx) {
        case MSR_IA32_TSC: {
 -              u64 tsc;
 +              u64 tsc_offset = data - native_read_tsc();
 +              u64 g_tsc_offset = 0;
 +
 +              if (is_nested(svm)) {
 +                      g_tsc_offset = svm->vmcb->control.tsc_offset -
 +                                     svm->nested.hsave->control.tsc_offset;
 +                      svm->nested.hsave->control.tsc_offset = tsc_offset;
 +              }
 +
 +              svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
  
 -              rdtscll(tsc);
 -              svm->vmcb->control.tsc_offset = data - tsc;
                break;
        }
        case MSR_K6_STAR:
        return 0;
  }
  
 -static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int wrmsr_interception(struct vcpu_svm *svm)
  {
        u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
        u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
        return 1;
  }
  
 -static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +static int msr_interception(struct vcpu_svm *svm)
  {
        if (svm->vmcb->control.exit_info_1)
 -              return wrmsr_interception(svm, kvm_run);
 +              return wrmsr_interception(svm);
        else
 -              return rdmsr_interception(svm, kvm_run);
 +              return rdmsr_interception(svm);
  }
  
 -static int interrupt_window_interception(struct vcpu_svm *svm,
 -                                 struct kvm_run *kvm_run)
 +static int interrupt_window_interception(struct vcpu_svm *svm)
  {
 +      struct kvm_run *kvm_run = svm->vcpu.run;
 +
        svm_clear_vintr(svm);
        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
        /*
        return 1;
  }
  
 -static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 -                                    struct kvm_run *kvm_run) = {
 +static int pause_interception(struct vcpu_svm *svm)
 +{
 +      kvm_vcpu_on_spin(&(svm->vcpu));
 +      return 1;
 +}
 +
 +static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
        [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
        [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
        [SVM_EXIT_CPUID]                        = cpuid_interception,
        [SVM_EXIT_IRET]                         = iret_interception,
        [SVM_EXIT_INVD]                         = emulate_on_interception,
 +      [SVM_EXIT_PAUSE]                        = pause_interception,
        [SVM_EXIT_HLT]                          = halt_interception,
        [SVM_EXIT_INVLPG]                       = invlpg_interception,
        [SVM_EXIT_INVLPGA]                      = invlpga_interception,
        [SVM_EXIT_VMSAVE]                       = vmsave_interception,
        [SVM_EXIT_STGI]                         = stgi_interception,
        [SVM_EXIT_CLGI]                         = clgi_interception,
 -      [SVM_EXIT_SKINIT]                       = invalid_op_interception,
 +      [SVM_EXIT_SKINIT]                       = skinit_interception,
        [SVM_EXIT_WBINVD]                       = emulate_on_interception,
        [SVM_EXIT_MONITOR]                      = invalid_op_interception,
        [SVM_EXIT_MWAIT]                        = invalid_op_interception,
        [SVM_EXIT_NPF]                          = pf_interception,
  };
  
 -static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 +static int handle_exit(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
 +      struct kvm_run *kvm_run = vcpu->run;
        u32 exit_code = svm->vmcb->control.exit_code;
  
        trace_kvm_exit(exit_code, svm->vmcb->save.rip);
  
 +      if (unlikely(svm->nested.exit_required)) {
 +              nested_svm_vmexit(svm);
 +              svm->nested.exit_required = false;
 +
 +              return 1;
 +      }
 +
        if (is_nested(svm)) {
                int vmexit;
  
 -              nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
 -                          exit_code, svm->vmcb->control.exit_info_1,
 -                          svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
 +              trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
 +                                      svm->vmcb->control.exit_info_1,
 +                                      svm->vmcb->control.exit_info_2,
 +                                      svm->vmcb->control.exit_int_info,
 +                                      svm->vmcb->control.exit_int_info_err);
  
                vmexit = nested_svm_exit_special(svm);
  
                return 0;
        }
  
 -      return svm_exit_handlers[exit_code](svm, kvm_run);
 +      return svm_exit_handlers[exit_code](svm);
  }
  
  static void reload_tss(struct kvm_vcpu *vcpu)
  {
        int cpu = raw_smp_processor_id();
  
-       struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
-       svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       sd->tss_desc->type = 9; /* available 32/64-bit TSS */
        load_TR_desc();
  }
  
@@@ -2438,12 -2385,12 +2436,12 @@@ static void pre_svm_run(struct vcpu_sv
  {
        int cpu = raw_smp_processor_id();
  
-       struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
  
        svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
        /* FIXME: handle wraparound of asid_generation */
-       if (svm->asid_generation != svm_data->asid_generation)
-               new_asid(svm, svm_data);
+       if (svm->asid_generation != sd->asid_generation)
+               new_asid(svm, sd);
  }
  
  static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@@ -2499,47 -2446,20 +2497,47 @@@ static int svm_nmi_allowed(struct kvm_v
                !(svm->vcpu.arch.hflags & HF_NMI_MASK);
  }
  
 +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
 +{
 +      struct vcpu_svm *svm = to_svm(vcpu);
 +
 +      return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
 +}
 +
 +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 +{
 +      struct vcpu_svm *svm = to_svm(vcpu);
 +
 +      if (masked) {
 +              svm->vcpu.arch.hflags |= HF_NMI_MASK;
 +              svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
 +      } else {
 +              svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
 +              svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
 +      }
 +}
 +
  static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb;
 -      return (vmcb->save.rflags & X86_EFLAGS_IF) &&
 -              !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 -              gif_set(svm) &&
 -              !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
 +      int ret;
 +
 +      if (!gif_set(svm) ||
 +           (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
 +              return 0;
 +
 +      ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
 +
 +      if (is_nested(svm))
 +              return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
 +
 +      return ret;
  }
  
  static void enable_irq_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
 -      nsvm_printk("Trying to open IRQ window\n");
  
        nested_svm_intr(svm);
  
@@@ -2564,7 -2484,7 +2562,7 @@@ static void enable_nmi_window(struct kv
        /* Something prevents NMI from been injected. Single step over
           possible problem (IRET or exception injection or interrupt
           shadow) */
 -      vcpu->arch.singlestep = true;
 +      svm->nmi_singlestep = true;
        svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
        update_db_intercept(vcpu);
  }
@@@ -2654,20 -2574,13 +2652,20 @@@ static void svm_complete_interrupts(str
  #define R "e"
  #endif
  
 -static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 +static void svm_vcpu_run(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
        u16 fs_selector;
        u16 gs_selector;
        u16 ldt_selector;
  
 +      /*
 +       * A vmexit emulation is required before the vcpu can be executed
 +       * again.
 +       */
 +      if (unlikely(svm->nested.exit_required))
 +              return;
 +
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
        svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
        svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@@ -2966,8 -2879,6 +2964,8 @@@ static struct kvm_x86_ops svm_x86_ops 
        .queue_exception = svm_queue_exception,
        .interrupt_allowed = svm_interrupt_allowed,
        .nmi_allowed = svm_nmi_allowed,
 +      .get_nmi_mask = svm_get_nmi_mask,
 +      .set_nmi_mask = svm_set_nmi_mask,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
diff --combined arch/x86/xen/smp.c
index 64757c0ba5fc2255a4d9188b3a68636832a73a1f,1167d9830f5f9c5cf259e4fba0273b8dd424993c..563d20504988ef7671cf600cdc2762f00418a4c8
  
  cpumask_var_t xen_cpu_initialized_map;
  
- static DEFINE_PER_CPU(int, resched_irq);
- static DEFINE_PER_CPU(int, callfunc_irq);
- static DEFINE_PER_CPU(int, callfuncsingle_irq);
- static DEFINE_PER_CPU(int, debug_irq) = -1;
+ static DEFINE_PER_CPU(int, xen_resched_irq);
+ static DEFINE_PER_CPU(int, xen_callfunc_irq);
+ static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
+ static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
  
  static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
  static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@@ -73,7 -73,7 +73,7 @@@ static __cpuinit void cpu_bringup(void
  
        xen_setup_cpu_clockevents();
  
 -      cpu_set(cpu, cpu_online_map);
 +      set_cpu_online(cpu, true);
        percpu_write(cpu_state, CPU_ONLINE);
        wmb();
  
@@@ -103,7 -103,7 +103,7 @@@ static int xen_smp_intr_init(unsigned i
                                    NULL);
        if (rc < 0)
                goto fail;
-       per_cpu(resched_irq, cpu) = rc;
+       per_cpu(xen_resched_irq, cpu) = rc;
  
        callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
        rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
                                    NULL);
        if (rc < 0)
                goto fail;
-       per_cpu(callfunc_irq, cpu) = rc;
+       per_cpu(xen_callfunc_irq, cpu) = rc;
  
        debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
        rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
                                     debug_name, NULL);
        if (rc < 0)
                goto fail;
-       per_cpu(debug_irq, cpu) = rc;
+       per_cpu(xen_debug_irq, cpu) = rc;
  
        callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
        rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
                                    NULL);
        if (rc < 0)
                goto fail;
-       per_cpu(callfuncsingle_irq, cpu) = rc;
+       per_cpu(xen_callfuncsingle_irq, cpu) = rc;
  
        return 0;
  
   fail:
-       if (per_cpu(resched_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
-       if (per_cpu(callfunc_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
-       if (per_cpu(debug_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
-       if (per_cpu(callfuncsingle_irq, cpu) >= 0)
-               unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+       if (per_cpu(xen_resched_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+       if (per_cpu(xen_callfunc_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+       if (per_cpu(xen_debug_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+       if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
+               unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
+                                      NULL);
  
        return rc;
  }
@@@ -295,7 -296,6 +296,7 @@@ static int __cpuinit xen_cpu_up(unsigne
                (unsigned long)task_stack_page(idle) -
                KERNEL_STACK_OFFSET + THREAD_SIZE;
  #endif
 +      xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
        xen_init_lock_cpu(cpu);
  
@@@ -349,10 -349,10 +350,10 @@@ static void xen_cpu_die(unsigned int cp
                current->state = TASK_UNINTERRUPTIBLE;
                schedule_timeout(HZ/10);
        }
-       unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
-       unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+       unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
        xen_uninit_lock_cpu(cpu);
        xen_teardown_timer(cpu);
  
diff --combined arch/x86/xen/time.c
index 9d1f853120d859cfc814e0f2eaa2e01b3f1575e8,26e37b787ad30f7665a015df7a094dfc537e77a0..0d3f07cd1b5fe9aee977674b11b0beb311e25eb0
  #define NS_PER_TICK   (1000000000LL / HZ)
  
  /* runstate info updated by Xen */
- static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
  
  /* snapshots of runstate info */
- static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
+ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
  
  /* unused ns of stolen and blocked time */
- static DEFINE_PER_CPU(u64, residual_stolen);
- static DEFINE_PER_CPU(u64, residual_blocked);
+ static DEFINE_PER_CPU(u64, xen_residual_stolen);
+ static DEFINE_PER_CPU(u64, xen_residual_blocked);
  
  /* return an consistent snapshot of 64-bit time/counter value */
  static u64 get64(const u64 *p)
@@@ -79,7 -79,7 +79,7 @@@ static void get_runstate_snapshot(struc
  
        BUG_ON(preemptible());
  
-       state = &__get_cpu_var(runstate);
+       state = &__get_cpu_var(xen_runstate);
  
        /*
         * The runstate info is always updated by the hypervisor on
  /* return true when a vcpu could run but has no real cpu to run on */
  bool xen_vcpu_stolen(int vcpu)
  {
-       return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
+       return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
  }
  
 -static void setup_runstate_info(int cpu)
 +void xen_setup_runstate_info(int cpu)
  {
        struct vcpu_register_runstate_memory_area area;
  
-       area.addr.v = &per_cpu(runstate, cpu);
+       area.addr.v = &per_cpu(xen_runstate, cpu);
  
        if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
                               cpu, &area))
@@@ -122,7 -122,7 +122,7 @@@ static void do_stolen_accounting(void
  
        WARN_ON(state.state != RUNSTATE_running);
  
-       snap = &__get_cpu_var(runstate_snapshot);
+       snap = &__get_cpu_var(xen_runstate_snapshot);
  
        /* work out how much time the VCPU has not been runn*ing*  */
        blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
  
        /* Add the appropriate number of ticks of stolen time,
           including any left-overs from last time. */
-       stolen = runnable + offline + __get_cpu_var(residual_stolen);
+       stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
  
        if (stolen < 0)
                stolen = 0;
  
        ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-       __get_cpu_var(residual_stolen) = stolen;
+       __get_cpu_var(xen_residual_stolen) = stolen;
        account_steal_ticks(ticks);
  
        /* Add the appropriate number of ticks of blocked time,
           including any left-overs from last time. */
-       blocked += __get_cpu_var(residual_blocked);
+       blocked += __get_cpu_var(xen_residual_blocked);
  
        if (blocked < 0)
                blocked = 0;
  
        ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
-       __get_cpu_var(residual_blocked) = blocked;
+       __get_cpu_var(xen_residual_blocked) = blocked;
        account_idle_ticks(ticks);
  }
  
@@@ -434,7 -434,7 +434,7 @@@ void xen_setup_timer(int cpu
                name = "<timer kasprintf failed>";
  
        irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
 -                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 +                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
                                      name, NULL);
  
        evt = &per_cpu(xen_clock_events, cpu);
  
        evt->cpumask = cpumask_of(cpu);
        evt->irq = irq;
 -
 -      setup_runstate_info(cpu);
  }
  
  void xen_teardown_timer(int cpu)
@@@ -492,7 -494,6 +492,7 @@@ __init void xen_time_init(void
  
        setup_force_cpu_cap(X86_FEATURE_TSC);
  
 +      xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
        xen_setup_cpu_clockevents();
  }
diff --combined crypto/cryptd.c
index f8ae0d94a6471e0703bfc24900f88dfeb1fc8be7,3d7fe8306e2a4377ecea748543ffccdb71d1b6cb..704c141153236917288ee7a09c12ce8b037754d8
@@@ -99,7 -99,7 +99,7 @@@ static int cryptd_enqueue_request(struc
        struct cryptd_cpu_queue *cpu_queue;
  
        cpu = get_cpu();
-       cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
+       cpu_queue = this_cpu_ptr(queue->cpu_queue);
        err = crypto_enqueue_request(&cpu_queue->queue, request);
        queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
        put_cpu();
@@@ -711,13 -711,6 +711,13 @@@ struct crypto_shash *cryptd_ahash_child
  }
  EXPORT_SYMBOL_GPL(cryptd_ahash_child);
  
 +struct shash_desc *cryptd_shash_desc(struct ahash_request *req)
 +{
 +      struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
 +      return &rctx->desc;
 +}
 +EXPORT_SYMBOL_GPL(cryptd_shash_desc);
 +
  void cryptd_free_ahash(struct cryptd_ahash *tfm)
  {
        crypto_free_ahash(&tfm->base);
diff --combined drivers/base/cpu.c
index 27fd775375b04965b834d7817db43efa059a2c97,69ee5b7517ecfa400a6ccbe111ccfe311d6eb7db..958bd1540c303d92f84cbcea016c7da4c0ce925c
@@@ -35,7 -35,6 +35,7 @@@ static ssize_t __ref store_online(struc
        struct cpu *cpu = container_of(dev, struct cpu, sysdev);
        ssize_t ret;
  
 +      cpu_hotplug_driver_lock();
        switch (buf[0]) {
        case '0':
                ret = cpu_down(cpu->sysdev.id);
@@@ -50,7 -49,6 +50,7 @@@
        default:
                ret = -EINVAL;
        }
 +      cpu_hotplug_driver_unlock();
  
        if (ret >= 0)
                ret = count;
@@@ -74,38 -72,6 +74,38 @@@ void unregister_cpu(struct cpu *cpu
        per_cpu(cpu_sys_devices, logical_cpu) = NULL;
        return;
  }
 +
 +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 +static ssize_t cpu_probe_store(struct class *class, const char *buf,
 +                             size_t count)
 +{
 +      return arch_cpu_probe(buf, count);
 +}
 +
 +static ssize_t cpu_release_store(struct class *class, const char *buf,
 +                               size_t count)
 +{
 +      return arch_cpu_release(buf, count);
 +}
 +
 +static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
 +static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
 +
 +int __init cpu_probe_release_init(void)
 +{
 +      int rc;
 +
 +      rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
 +                             &class_attr_probe.attr);
 +      if (!rc)
 +              rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
 +                                     &class_attr_release.attr);
 +
 +      return rc;
 +}
 +device_initcall(cpu_probe_release_init);
 +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 +
  #else /* ... !CONFIG_HOTPLUG_CPU */
  static inline void register_cpu_control(struct cpu *cpu)
  {
@@@ -131,7 -97,7 +131,7 @@@ static ssize_t show_crash_notes(struct 
         * boot up and this data does not change there after. Hence this
         * operation should be safe. No locking required.
         */
-       addr = __pa(per_cpu_ptr(crash_notes, cpunum));
+       addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
        rc = sprintf(buf, "%Lx\n", addr);
        return rc;
  }
index f20668c09ce0611d3da1fa8582514ccfec949347,af93a8175c5eee0bef7f15fe2d7de4854c88d064..67bc2ece7b4b508da7855937a01eb388cd33ab5b
@@@ -41,7 -41,7 +41,7 @@@ static struct cpufreq_driver *cpufreq_d
  static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
  #ifdef CONFIG_HOTPLUG_CPU
  /* This one keeps track of the previously set governor of a removed CPU */
 -static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
 +static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
  #endif
  static DEFINE_SPINLOCK(cpufreq_driver_lock);
  
   * - Lock should not be held across
   *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
   */
- static DEFINE_PER_CPU(int, policy_cpu);
+ static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
  static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
  
  #define lock_policy_rwsem(mode, cpu)                                  \
  int lock_policy_rwsem_##mode                                          \
  (int cpu)                                                             \
  {                                                                     \
-       int policy_cpu = per_cpu(policy_cpu, cpu);                      \
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
        BUG_ON(policy_cpu == -1);                                       \
        down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
        if (unlikely(!cpu_online(cpu))) {                               \
@@@ -90,7 -90,7 +90,7 @@@ EXPORT_SYMBOL_GPL(lock_policy_rwsem_wri
  
  void unlock_policy_rwsem_read(int cpu)
  {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
        BUG_ON(policy_cpu == -1);
        up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
  }
@@@ -98,7 -98,7 +98,7 @@@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_r
  
  void unlock_policy_rwsem_write(int cpu)
  {
-       int policy_cpu = per_cpu(policy_cpu, cpu);
+       int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
        BUG_ON(policy_cpu == -1);
        up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
  }
@@@ -647,21 -647,6 +647,21 @@@ static ssize_t show_scaling_setspeed(st
        return policy->governor->show_setspeed(policy, buf);
  }
  
 +/**
 + * show_scaling_driver - show the current cpufreq HW/BIOS limitation
 + */
 +static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
 +{
 +      unsigned int limit;
 +      int ret;
 +      if (cpufreq_driver->bios_limit) {
 +              ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
 +              if (!ret)
 +                      return sprintf(buf, "%u\n", limit);
 +      }
 +      return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
 +}
 +
  #define define_one_ro(_name) \
  static struct freq_attr _name = \
  __ATTR(_name, 0444, show_##_name, NULL)
@@@ -681,7 -666,6 +681,7 @@@ define_one_ro(cpuinfo_transition_latenc
  define_one_ro(scaling_available_governors);
  define_one_ro(scaling_driver);
  define_one_ro(scaling_cur_freq);
 +define_one_ro(bios_limit);
  define_one_ro(related_cpus);
  define_one_ro(affected_cpus);
  define_one_rw(scaling_min_freq);
@@@ -783,20 -767,17 +783,20 @@@ static struct kobj_type ktype_cpufreq 
   *   0:        Success
   *   Positive: When we have a managed CPU and the sysfs got symlinked
   */
 -int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy,
 -              struct sys_device *sys_dev)
 +static int cpufreq_add_dev_policy(unsigned int cpu,
 +                                struct cpufreq_policy *policy,
 +                                struct sys_device *sys_dev)
  {
        int ret = 0;
  #ifdef CONFIG_SMP
        unsigned long flags;
        unsigned int j;
 -
  #ifdef CONFIG_HOTPLUG_CPU
 -      if (per_cpu(cpufreq_cpu_governor, cpu)) {
 -              policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
 +      struct cpufreq_governor *gov;
 +
 +      gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 +      if (gov) {
 +              policy->governor = gov;
                dprintk("Restoring governor %s for cpu %d\n",
                       policy->governor->name, cpu);
        }
  
                        /* Set proper policy_cpu */
                        unlock_policy_rwsem_write(cpu);
-                       per_cpu(policy_cpu, cpu) = managed_policy->cpu;
+                       per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
  
                        if (lock_policy_rwsem_write(cpu) < 0) {
                                /* Should not go through policy unlock path */
  
  
  /* symlink affected CPUs */
 -int cpufreq_add_dev_symlink(unsigned int cpu, struct cpufreq_policy *policy)
 +static int cpufreq_add_dev_symlink(unsigned int cpu,
 +                                 struct cpufreq_policy *policy)
  {
        unsigned int j;
        int ret = 0;
        return ret;
  }
  
 -int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy,
 -              struct sys_device *sys_dev)
 +static int cpufreq_add_dev_interface(unsigned int cpu,
 +                                   struct cpufreq_policy *policy,
 +                                   struct sys_device *sys_dev)
  {
        struct cpufreq_policy new_policy;
        struct freq_attr **drv_attr;
                if (ret)
                        goto err_out_kobj_put;
        }
 +      if (cpufreq_driver->bios_limit) {
 +              ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
 +              if (ret)
 +                      goto err_out_kobj_put;
 +      }
  
        spin_lock_irqsave(&cpufreq_driver_lock, flags);
        for_each_cpu(j, policy->cpus) {
        if (!cpu_online(j))
                continue;
                per_cpu(cpufreq_cpu_data, j) = policy;
-               per_cpu(policy_cpu, j) = policy->cpu;
+               per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
        }
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
  
@@@ -975,13 -949,10 +975,13 @@@ err_out_kobj_put
  static int cpufreq_add_dev(struct sys_device *sys_dev)
  {
        unsigned int cpu = sys_dev->id;
 -      int ret = 0;
 +      int ret = 0, found = 0;
        struct cpufreq_policy *policy;
        unsigned long flags;
        unsigned int j;
 +#ifdef CONFIG_HOTPLUG_CPU
 +      int sibling;
 +#endif
  
        if (cpu_is_offline(cpu))
                return 0;
        cpumask_copy(policy->cpus, cpumask_of(cpu));
  
        /* Initially set CPU itself as the policy_cpu */
-       per_cpu(policy_cpu, cpu) = cpu;
+       per_cpu(cpufreq_policy_cpu, cpu) = cpu;
        ret = (lock_policy_rwsem_write(cpu) < 0);
        WARN_ON(ret);
  
        INIT_WORK(&policy->update, handle_update);
  
        /* Set governor before ->init, so that driver could check it */
 -      policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 +#ifdef CONFIG_HOTPLUG_CPU
 +      for_each_online_cpu(sibling) {
 +              struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
 +              if (cp && cp->governor &&
 +                  (cpumask_test_cpu(cpu, cp->related_cpus))) {
 +                      policy->governor = cp->governor;
 +                      found = 1;
 +                      break;
 +              }
 +      }
 +#endif
 +      if (!found)
 +              policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
        /* call driver. From then on the cpufreq must be able
         * to accept all calls to ->verify and ->setpolicy for this CPU
         */
@@@ -1152,8 -1111,7 +1152,8 @@@ static int __cpufreq_remove_dev(struct 
  #ifdef CONFIG_SMP
  
  #ifdef CONFIG_HOTPLUG_CPU
 -      per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
 +      strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
 +                      CPUFREQ_NAME_LEN);
  #endif
  
        /* if we have other CPUs still registered, we need to unlink them,
                                continue;
                        dprintk("removing link for cpu %u\n", j);
  #ifdef CONFIG_HOTPLUG_CPU
 -                      per_cpu(cpufreq_cpu_governor, j) = data->governor;
 +                      strncpy(per_cpu(cpufreq_cpu_governor, j),
 +                              data->governor->name, CPUFREQ_NAME_LEN);
  #endif
                        cpu_sys_dev = get_cpu_sysdev(j);
                        sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
@@@ -1649,22 -1606,9 +1649,22 @@@ EXPORT_SYMBOL_GPL(cpufreq_register_gove
  
  void cpufreq_unregister_governor(struct cpufreq_governor *governor)
  {
 +#ifdef CONFIG_HOTPLUG_CPU
 +      int cpu;
 +#endif
 +
        if (!governor)
                return;
  
 +#ifdef CONFIG_HOTPLUG_CPU
 +      for_each_present_cpu(cpu) {
 +              if (cpu_online(cpu))
 +                      continue;
 +              if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
 +                      strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
 +      }
 +#endif
 +
        mutex_lock(&cpufreq_governor_mutex);
        list_del(&governor->governor_list);
        mutex_unlock(&cpufreq_governor_mutex);
@@@ -2002,7 -1946,7 +2002,7 @@@ static int __init cpufreq_core_init(voi
        int cpu;
  
        for_each_possible_cpu(cpu) {
-               per_cpu(policy_cpu, cpu) = -1;
+               per_cpu(cpufreq_policy_cpu, cpu) = -1;
                init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
        }
  
index 84c51e17726966c196ac53ce05671e972f65b98a,721d004a0235247d4b974adbfe8d90bdd4ea43f2..8c2f3703ec855f27a6863cb964c5d58a9d01dfa2
@@@ -64,7 -64,7 +64,7 @@@ struct aes_ctx 
        u32 *D;
  };
  
- static DEFINE_PER_CPU(struct cword *, last_cword);
+ static DEFINE_PER_CPU(struct cword *, paes_last_cword);
  
  /* Tells whether the ACE is capable to generate
     the extended key for a given key_len. */
@@@ -152,9 -152,9 +152,9 @@@ static int aes_set_key(struct crypto_tf
  
  ok:
        for_each_online_cpu(cpu)
-               if (&ctx->cword.encrypt == per_cpu(last_cword, cpu) ||
-                   &ctx->cword.decrypt == per_cpu(last_cword, cpu))
-                       per_cpu(last_cword, cpu) = NULL;
+               if (&ctx->cword.encrypt == per_cpu(paes_last_cword, cpu) ||
+                   &ctx->cword.decrypt == per_cpu(paes_last_cword, cpu))
+                       per_cpu(paes_last_cword, cpu) = NULL;
  
        return 0;
  }
@@@ -166,7 -166,7 +166,7 @@@ static inline void padlock_reset_key(st
  {
        int cpu = raw_smp_processor_id();
  
-       if (cword != per_cpu(last_cword, cpu))
+       if (cword != per_cpu(paes_last_cword, cpu))
  #ifndef CONFIG_X86_64
                asm volatile ("pushfl; popfl");
  #else
  
  static inline void padlock_store_cword(struct cword *cword)
  {
-       per_cpu(last_cword, raw_smp_processor_id()) = cword;
+       per_cpu(paes_last_cword, raw_smp_processor_id()) = cword;
  }
  
  /*
@@@ -236,7 -236,7 +236,7 @@@ static inline void ecb_crypt(const u8 *
        /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
         * We could avoid some copying here but it's probably not worth it.
         */
 -      if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) {
 +      if (unlikely(((unsigned long)in & ~PAGE_MASK) + ecb_fetch_bytes > PAGE_SIZE)) {
                ecb_crypt_copy(in, out, key, cword, count);
                return;
        }
@@@ -248,7 -248,7 +248,7 @@@ static inline u8 *cbc_crypt(const u8 *i
                            u8 *iv, struct cword *cword, int count)
  {
        /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
 -      if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE))
 +      if (unlikely(((unsigned long)in & ~PAGE_MASK) + cbc_fetch_bytes > PAGE_SIZE))
                return cbc_crypt_copy(in, out, key, iv, cword, count);
  
        return rep_xcrypt_cbc(in, out, key, iv, cword, count);
diff --combined drivers/dma/dmaengine.c
index 8f99354082ceaa169f7ac081594bc83b0c003478,51d7480d3a92d537abe018d44b830f2318bc5bdf..6f51a0a7a8bbdbca798f53293516e178ead5f4d2
@@@ -326,14 -326,7 +326,7 @@@ arch_initcall(dma_channel_table_init)
   */
  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
  {
-       struct dma_chan *chan;
-       int cpu;
-       cpu = get_cpu();
-       chan = per_cpu_ptr(channel_table[tx_type], cpu)->chan;
-       put_cpu();
-       return chan;
+       return this_cpu_read(channel_table[tx_type]->chan);
  }
  EXPORT_SYMBOL(dma_find_channel);
  
@@@ -632,21 -625,11 +625,21 @@@ static bool device_has_all_tx_types(str
        #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
        if (!dma_has_cap(DMA_XOR, device->cap_mask))
                return false;
 +
 +      #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
 +      if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
 +              return false;
 +      #endif
        #endif
  
        #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
        if (!dma_has_cap(DMA_PQ, device->cap_mask))
                return false;
 +
 +      #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
 +      if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
 +              return false;
 +      #endif
        #endif
  
        return true;
@@@ -857,7 -840,6 +850,6 @@@ dma_async_memcpy_buf_to_buf(struct dma_
        struct dma_async_tx_descriptor *tx;
        dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
-       int cpu;
        unsigned long flags;
  
        dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
        tx->callback = NULL;
        cookie = tx->tx_submit(tx);
  
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
  
        return cookie;
  }
@@@ -906,7 -888,6 +898,6 @@@ dma_async_memcpy_buf_to_pg(struct dma_c
        struct dma_async_tx_descriptor *tx;
        dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
-       int cpu;
        unsigned long flags;
  
        dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
        tx->callback = NULL;
        cookie = tx->tx_submit(tx);
  
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
  
        return cookie;
  }
@@@ -955,7 -936,6 +946,6 @@@ dma_async_memcpy_pg_to_pg(struct dma_ch
        struct dma_async_tx_descriptor *tx;
        dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
-       int cpu;
        unsigned long flags;
  
        dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
        tx->callback = NULL;
        cookie = tx->tx_submit(tx);
  
-       cpu = get_cpu();
-       per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
-       per_cpu_ptr(chan->local, cpu)->memcpy_count++;
-       put_cpu();
+       preempt_disable();
+       __this_cpu_add(chan->local->bytes_transferred, len);
+       __this_cpu_inc(chan->local->memcpy_count);
+       preempt_enable();
  
        return cookie;
  }
diff --combined drivers/net/loopback.c
index eae4ad749e9d87956facbfadd53036d5d882ce03,8ebeb76a373d862c8d157017917990bf626e11dd..b9fcc9819837e7f79530ad11782272ea0f81a344
@@@ -81,7 -81,7 +81,7 @@@ static netdev_tx_t loopback_xmit(struc
  
        /* it's OK to use per_cpu_ptr() because BHs are off */
        pcpu_lstats = dev->ml_priv;
-       lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
+       lb_stats = this_cpu_ptr(pcpu_lstats);
  
        len = skb->len;
        if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
@@@ -207,12 -207,20 +207,12 @@@ static __net_init int loopback_net_init
  out_free_netdev:
        free_netdev(dev);
  out:
 -      if (net == &init_net)
 +      if (net_eq(net, &init_net))
                panic("loopback: Failed to register netdevice: %d\n", err);
        return err;
  }
  
 -static __net_exit void loopback_net_exit(struct net *net)
 -{
 -      struct net_device *dev = net->loopback_dev;
 -
 -      unregister_netdev(dev);
 -}
 -
  /* Registered in net/core/dev.c */
  struct pernet_operations __net_initdata loopback_net_ops = {
         .init = loopback_net_init,
 -       .exit = loopback_net_exit,
  };
diff --combined drivers/net/veth.c
index 63099c58a6ddd7a92f616ca196141becbd7ad463,0c4a811242574e9d6432e83db588ff9122f78a0c..3a15de56df9caa31b818139dbcd75e2dc99e5cc8
@@@ -153,25 -153,34 +153,24 @@@ static netdev_tx_t veth_xmit(struct sk_
        struct net_device *rcv = NULL;
        struct veth_priv *priv, *rcv_priv;
        struct veth_net_stats *stats, *rcv_stats;
-       int length, cpu;
+       int length;
  
 -      skb_orphan(skb);
 -
        priv = netdev_priv(dev);
        rcv = priv->peer;
        rcv_priv = netdev_priv(rcv);
  
-       cpu = smp_processor_id();
-       stats = per_cpu_ptr(priv->stats, cpu);
-       rcv_stats = per_cpu_ptr(rcv_priv->stats, cpu);
+       stats = this_cpu_ptr(priv->stats);
+       rcv_stats = this_cpu_ptr(rcv_priv->stats);
  
        if (!(rcv->flags & IFF_UP))
                goto tx_drop;
  
 -      if (skb->len > (rcv->mtu + MTU_PAD))
 -              goto rx_drop;
 -
 -        skb->tstamp.tv64 = 0;
 -      skb->pkt_type = PACKET_HOST;
 -      skb->protocol = eth_type_trans(skb, rcv);
        if (dev->features & NETIF_F_NO_CSUM)
                skb->ip_summed = rcv_priv->ip_summed;
  
 -      skb->mark = 0;
 -      secpath_reset(skb);
 -      nf_reset(skb);
 -
 -      length = skb->len;
 +      length = skb->len + ETH_HLEN;
 +      if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
 +              goto rx_drop;
  
        stats->tx_bytes += length;
        stats->tx_packets++;
        rcv_stats->rx_bytes += length;
        rcv_stats->rx_packets++;
  
 -      netif_rx(skb);
        return NETDEV_TX_OK;
  
  tx_drop:
@@@ -199,29 -209,32 +198,29 @@@ rx_drop
  static struct net_device_stats *veth_get_stats(struct net_device *dev)
  {
        struct veth_priv *priv;
 -      struct net_device_stats *dev_stats;
        int cpu;
 -      struct veth_net_stats *stats;
 +      struct veth_net_stats *stats, total = {0};
  
        priv = netdev_priv(dev);
 -      dev_stats = &dev->stats;
  
 -      dev_stats->rx_packets = 0;
 -      dev_stats->tx_packets = 0;
 -      dev_stats->rx_bytes = 0;
 -      dev_stats->tx_bytes = 0;
 -      dev_stats->tx_dropped = 0;
 -      dev_stats->rx_dropped = 0;
 -
 -      for_each_online_cpu(cpu) {
 +      for_each_possible_cpu(cpu) {
                stats = per_cpu_ptr(priv->stats, cpu);
  
 -              dev_stats->rx_packets += stats->rx_packets;
 -              dev_stats->tx_packets += stats->tx_packets;
 -              dev_stats->rx_bytes += stats->rx_bytes;
 -              dev_stats->tx_bytes += stats->tx_bytes;
 -              dev_stats->tx_dropped += stats->tx_dropped;
 -              dev_stats->rx_dropped += stats->rx_dropped;
 +              total.rx_packets += stats->rx_packets;
 +              total.tx_packets += stats->tx_packets;
 +              total.rx_bytes   += stats->rx_bytes;
 +              total.tx_bytes   += stats->tx_bytes;
 +              total.tx_dropped += stats->tx_dropped;
 +              total.rx_dropped += stats->rx_dropped;
        }
 -
 -      return dev_stats;
 +      dev->stats.rx_packets = total.rx_packets;
 +      dev->stats.tx_packets = total.tx_packets;
 +      dev->stats.rx_bytes   = total.rx_bytes;
 +      dev->stats.tx_bytes   = total.tx_bytes;
 +      dev->stats.tx_dropped = total.tx_dropped;
 +      dev->stats.rx_dropped = total.rx_dropped;
 +
 +      return &dev->stats;
  }
  
  static int veth_open(struct net_device *dev)
@@@ -326,7 -339,7 +325,7 @@@ static int veth_validate(struct nlattr 
  
  static struct rtnl_link_ops veth_link_ops;
  
 -static int veth_newlink(struct net_device *dev,
 +static int veth_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[])
  {
        int err;
        struct veth_priv *priv;
        char ifname[IFNAMSIZ];
        struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
 +      struct net *net;
  
        /*
         * create and register peer first
        else
                snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
  
 -      peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp);
 -      if (IS_ERR(peer))
 +      net = rtnl_link_get_net(src_net, tbp);
 +      if (IS_ERR(net))
 +              return PTR_ERR(net);
 +
 +      peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp);
 +      if (IS_ERR(peer)) {
 +              put_net(net);
                return PTR_ERR(peer);
 +      }
  
        if (tbp[IFLA_ADDRESS] == NULL)
                random_ether_addr(peer->dev_addr);
  
        err = register_netdevice(peer);
 +      put_net(net);
 +      net = NULL;
        if (err < 0)
                goto err_register_peer;
  
@@@ -437,7 -441,7 +436,7 @@@ err_register_peer
        return err;
  }
  
 -static void veth_dellink(struct net_device *dev)
 +static void veth_dellink(struct net_device *dev, struct list_head *head)
  {
        struct veth_priv *priv;
        struct net_device *peer;
        priv = netdev_priv(dev);
        peer = priv->peer;
  
 -      unregister_netdevice(dev);
 -      unregister_netdevice(peer);
 +      unregister_netdevice_queue(dev, head);
 +      unregister_netdevice_queue(peer, head);
  }
  
  static const struct nla_policy veth_policy[VETH_INFO_MAX + 1];
index 395c04c2b00fcd5cca85a696bccddf6f540b7064,14e61441ba0b7279740e8e1a5b6f156c10560565..98c04cac43c1d87467ee60f43807c0dbc654409b
@@@ -113,11 -113,9 +113,9 @@@ static inline int iucv_dbf_passes(debug
  #define IUCV_DBF_TEXT_(name, level, text...) \
        do { \
                if (iucv_dbf_passes(iucv_dbf_##name, level)) { \
-                       char* iucv_dbf_txt_buf = \
-                                       get_cpu_var(iucv_dbf_txt_buf); \
-                       sprintf(iucv_dbf_txt_buf, text); \
-                       debug_text_event(iucv_dbf_##name, level, \
-                                               iucv_dbf_txt_buf); \
+                       char* __buf = get_cpu_var(iucv_dbf_txt_buf); \
+                       sprintf(__buf, text); \
+                       debug_text_event(iucv_dbf_##name, level, __buf); \
                        put_cpu_var(iucv_dbf_txt_buf); \
                } \
        } while (0)
@@@ -741,13 -739,13 +739,13 @@@ static void conn_action_txdone(fsm_inst
        if (single_flag) {
                if ((skb = skb_dequeue(&conn->commit_queue))) {
                        atomic_dec(&skb->users);
 -                      dev_kfree_skb_any(skb);
                        if (privptr) {
                                privptr->stats.tx_packets++;
                                privptr->stats.tx_bytes +=
                                        (skb->len - NETIUCV_HDRLEN
 -                                                - NETIUCV_HDRLEN);
 +                                                - NETIUCV_HDRLEN);
                        }
 +                      dev_kfree_skb_any(skb);
                }
        }
        conn->tx_buff->data = conn->tx_buff->head;
diff --combined fs/ext4/mballoc.c
index c1e19d5b5985f2a36d39801e697b9b45f7283f50,d527fd384582e75f872ad40ba001353bdef3aab3..b1fd3daadc9c50522894fd00ec62bb8e600f9fd0
   * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
   * value of s_mb_order2_reqs can be tuned via
   * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
 - * stripe size (sbi->s_stripe), we try to search for contigous block in
 + * stripe size (sbi->s_stripe), we try to search for contiguous block in
   * stripe size. This should result in better allocation on RAID setups. If
   * not, we search in the specific group using bitmap for best extents. The
   * tunable min_to_scan and max_to_scan control the behaviour here.
@@@ -2529,6 -2529,7 +2529,6 @@@ static void release_blocks_on_commit(jo
        struct ext4_group_info *db;
        int err, count = 0, count2 = 0;
        struct ext4_free_data *entry;
 -      ext4_fsblk_t discard_block;
        struct list_head *l, *ltmp;
  
        list_for_each_safe(l, ltmp, &txn->t_private_list) {
                        page_cache_release(e4b.bd_bitmap_page);
                }
                ext4_unlock_group(sb, entry->group);
 -              discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
 -                      + entry->start_blk
 -                      + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 -              trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
 -                                        entry->count);
 -              sb_issue_discard(sb, discard_block, entry->count);
 -
 +              if (test_opt(sb, DISCARD)) {
 +                      ext4_fsblk_t discard_block;
 +                      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 +
 +                      discard_block = (ext4_fsblk_t)entry->group *
 +                                              EXT4_BLOCKS_PER_GROUP(sb)
 +                                      + entry->start_blk
 +                                      + le32_to_cpu(es->s_first_data_block);
 +                      trace_ext4_discard_blocks(sb,
 +                                      (unsigned long long)discard_block,
 +                                      entry->count);
 +                      sb_issue_discard(sb, discard_block, entry->count);
 +              }
                kmem_cache_free(ext4_free_ext_cachep, entry);
                ext4_mb_release_desc(&e4b);
        }
@@@ -3010,24 -3005,6 +3010,24 @@@ static void ext4_mb_collect_stats(struc
                trace_ext4_mballoc_prealloc(ac);
  }
  
 +/*
 + * Called on failure; free up any blocks from the inode PA for this
 + * context.  We don't need this for MB_GROUP_PA because we only change
 + * pa_free in ext4_mb_release_context(), but on failure, we've already
 + * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
 + */
 +static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 +{
 +      struct ext4_prealloc_space *pa = ac->ac_pa;
 +      int len;
 +
 +      if (pa && pa->pa_type == MB_INODE_PA) {
 +              len = ac->ac_b_ex.fe_len;
 +              pa->pa_free += len;
 +      }
 +
 +}
 +
  /*
   * use blocks preallocated to inode
   */
@@@ -3955,7 -3932,7 +3955,7 @@@ static void ext4_mb_group_or_file(struc
         * per cpu locality group is to reduce the contention between block
         * request from multiple CPUs.
         */
-       ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
+       ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
  
        /* we're going to use group allocation */
        ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@@ -4313,7 -4290,6 +4313,7 @@@ repeat
                        ac->ac_status = AC_STATUS_CONTINUE;
                        goto repeat;
                } else if (*errp) {
 +                      ext4_discard_allocated_blocks(ac);
                        ac->ac_b_ex.fe_len = 0;
                        ar->len = 0;
                        ext4_mb_show_ac(ac);
@@@ -4446,24 -4422,18 +4446,24 @@@ ext4_mb_free_metadata(handle_t *handle
        return 0;
  }
  
 -/*
 - * Main entry point into mballoc to free blocks
 +/**
 + * ext4_free_blocks() -- Free given blocks and update quota
 + * @handle:           handle for this transaction
 + * @inode:            inode
 + * @block:            start physical block to free
 + * @count:            number of blocks to count
 + * @metadata:                 Are these metadata blocks
   */
 -void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 -                      ext4_fsblk_t block, unsigned long count,
 -                      int metadata, unsigned long *freed)
 +void ext4_free_blocks(handle_t *handle, struct inode *inode,
 +                    struct buffer_head *bh, ext4_fsblk_t block,
 +                    unsigned long count, int flags)
  {
        struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
        struct ext4_allocation_context *ac = NULL;
        struct ext4_group_desc *gdp;
        struct ext4_super_block *es;
 +      unsigned long freed = 0;
        unsigned int overflow;
        ext4_grpblk_t bit;
        struct buffer_head *gd_bh;
        int err = 0;
        int ret;
  
 -      *freed = 0;
 +      if (bh) {
 +              if (block)
 +                      BUG_ON(block != bh->b_blocknr);
 +              else
 +                      block = bh->b_blocknr;
 +      }
  
        sbi = EXT4_SB(sb);
        es = EXT4_SB(sb)->s_es;
 -      if (block < le32_to_cpu(es->s_first_data_block) ||
 -          block + count < block ||
 -          block + count > ext4_blocks_count(es)) {
 +      if (!ext4_data_block_valid(sbi, block, count)) {
                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %llu, count = %lu", block, count);
        }
  
        ext4_debug("freeing block %llu\n", block);
 -      trace_ext4_free_blocks(inode, block, count, metadata);
 +      trace_ext4_free_blocks(inode, block, count, flags);
 +
 +      if (flags & EXT4_FREE_BLOCKS_FORGET) {
 +              struct buffer_head *tbh = bh;
 +              int i;
 +
 +              BUG_ON(bh && (count > 1));
 +
 +              for (i = 0; i < count; i++) {
 +                      if (!bh)
 +                              tbh = sb_find_get_block(inode->i_sb,
 +                                                      block + i);
 +                      ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
 +                                  inode, tbh, block + i);
 +              }
 +      }
 +
 +      /* 
 +       * We need to make sure we don't reuse the freed block until
 +       * after the transaction is committed, which we can do by
 +       * treating the block as metadata, below.  We make an
 +       * exception if the inode is to be written in writeback mode
 +       * since writeback mode has weak data consistency guarantees.
 +       */
 +      if (!ext4_should_writeback_data(inode))
 +              flags |= EXT4_FREE_BLOCKS_METADATA;
  
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
@@@ -4591,8 -4533,7 +4591,8 @@@ do_more
        err = ext4_mb_load_buddy(sb, block_group, &e4b);
        if (err)
                goto error_return;
 -      if (metadata && ext4_handle_valid(handle)) {
 +
 +      if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
                struct ext4_free_data *new_entry;
                /*
                 * blocks being freed are metadata. these blocks shouldn't
  
        ext4_mb_release_desc(&e4b);
  
 -      *freed += count;
 +      freed += count;
  
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        }
        sb->s_dirt = 1;
  error_return:
 +      if (freed)
 +              vfs_dq_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
        if (ac)
diff --combined fs/xfs/xfs_mount.c
index 66a888a9ad6f87a0f913bc6b1508d0f8897927fb,ccafe8ef7ad5cab12f78c7fce06b5dfbacb6cb47..bfffd6334abbfae37d804174241bc1a1c17760d3
@@@ -583,8 -583,8 +583,8 @@@ xfs_readsb(xfs_mount_t *mp, int flags
        sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
        extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
  
 -      bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
 -                              BTOBB(sector_size), extra_flags);
 +      bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
 +                        extra_flags);
        if (!bp || XFS_BUF_ISERROR(bp)) {
                xfs_fs_mount_cmn_err(flags, "SB read failed");
                error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
                XFS_BUF_UNMANAGE(bp);
                xfs_buf_relse(bp);
                sector_size = mp->m_sb.sb_sectsize;
 -              bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
 -                                      BTOBB(sector_size), extra_flags);
 +              bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
 +                                BTOBB(sector_size), extra_flags);
                if (!bp || XFS_BUF_ISERROR(bp)) {
                        xfs_fs_mount_cmn_err(flags, "SB re-read failed");
                        error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@@ -1471,7 -1471,7 +1471,7 @@@ xfs_log_sbcount
        if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
                return 0;
  
 -      tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
 +      tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
                                        XFS_DEFAULT_LOG_COUNT);
        if (error) {
@@@ -2123,7 -2123,7 +2123,7 @@@ xfs_icsb_destroy_counters
        mutex_destroy(&mp->m_icsb_mutex);
  }
  
 -STATIC_INLINE void
 +STATIC void
  xfs_icsb_lock_cntr(
        xfs_icsb_cnts_t *icsbp)
  {
        }
  }
  
 -STATIC_INLINE void
 +STATIC void
  xfs_icsb_unlock_cntr(
        xfs_icsb_cnts_t *icsbp)
  {
  }
  
  
 -STATIC_INLINE void
 +STATIC void
  xfs_icsb_lock_all_counters(
        xfs_mount_t     *mp)
  {
        }
  }
  
 -STATIC_INLINE void
 +STATIC void
  xfs_icsb_unlock_all_counters(
        xfs_mount_t     *mp)
  {
@@@ -2389,12 -2389,12 +2389,12 @@@ xfs_icsb_modify_counters
  {
        xfs_icsb_cnts_t *icsbp;
        long long       lcounter;       /* long counter for 64 bit fields */
-       int             cpu, ret = 0;
+       int             ret = 0;
  
        might_sleep();
  again:
-       cpu = get_cpu();
-       icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu);
+       preempt_disable();
+       icsbp = this_cpu_ptr(mp->m_sb_cnts);
  
        /*
         * if the counter is disabled, go to slow path
                break;
        }
        xfs_icsb_unlock_cntr(icsbp);
-       put_cpu();
+       preempt_enable();
        return 0;
  
  slow_path:
-       put_cpu();
+       preempt_enable();
  
        /*
         * serialise with a mutex so we don't burn lots of cpu on
  
  balance_counter:
        xfs_icsb_unlock_cntr(icsbp);
-       put_cpu();
+       preempt_enable();
  
        /*
         * We may have multiple threads here if multiple per-cpu
diff --combined include/net/neighbour.h
index 0302f31a2fb7e53956f089efac74848fbd15466d,f28403ff7648e286c85d131d1a848f01f1d35489..b0173202cad96f3a8aa55df85d16b2d11a62b608
@@@ -37,7 -37,8 +37,7 @@@
  
  struct neighbour;
  
 -struct neigh_parms
 -{
 +struct neigh_parms {
  #ifdef CONFIG_NET_NS
        struct net *net;
  #endif
@@@ -69,7 -70,8 +69,7 @@@
        int     locktime;
  };
  
 -struct neigh_statistics
 -{
 +struct neigh_statistics {
        unsigned long allocs;           /* number of allocated neighs */
        unsigned long destroys;         /* number of destroyed neighs */
        unsigned long hash_grows;       /* number of hash resizes */
        unsigned long unres_discards;   /* number of unresolved drops */
  };
  
- #define NEIGH_CACHE_STAT_INC(tbl, field)                              \
-       do {                                                            \
-               preempt_disable();                                      \
-               (per_cpu_ptr((tbl)->stats, smp_processor_id())->field)++; \
-               preempt_enable();                                       \
-       } while (0)
+ #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
  
 -struct neighbour
 -{
 +struct neighbour {
        struct neighbour        *next;
        struct neigh_table      *tbl;
        struct neigh_parms      *parms;
        u8                      primary_key[0];
  };
  
 -struct neigh_ops
 -{
 +struct neigh_ops {
        int                     family;
        void                    (*solicit)(struct neighbour *, struct sk_buff*);
        void                    (*error_report)(struct neighbour *, struct sk_buff*);
        int                     (*queue_xmit)(struct sk_buff*);
  };
  
 -struct pneigh_entry
 -{
 +struct pneigh_entry {
        struct pneigh_entry     *next;
  #ifdef CONFIG_NET_NS
        struct net              *net;
   */
  
  
 -struct neigh_table
 -{
 +struct neigh_table {
        struct neigh_table      *next;
        int                     family;
        int                     entry_size;
@@@ -258,7 -259,8 +253,7 @@@ extern int                 neigh_sysctl_register(stru
                                                      struct neigh_parms *p,
                                                      int p_id, int pdev_id,
                                                      char *p_name,
 -                                                    proc_handler *proc_handler,
 -                                                    ctl_handler *strategy);
 +                                                    proc_handler *proc_handler);
  extern void                   neigh_sysctl_unregister(struct neigh_parms *p);
  
  static inline void __neigh_parms_put(struct neigh_parms *parms)
index 5cf7270e3ffc3e94c6d80bd79168056e9c1e539f,dde549779e422d113bd7e2025e9c391642326a87..a0904adfb8f7aba343152a5af47aa7e818c3b39d
@@@ -255,9 -255,11 +255,9 @@@ static inline bool nf_ct_kill(struct nf
  }
  
  /* These are for NAT.  Icky. */
 -/* Update TCP window tracking data when NAT mangles the packet */
 -extern void nf_conntrack_tcp_update(const struct sk_buff *skb,
 -                                  unsigned int dataoff,
 -                                  struct nf_conn *ct, int dir,
 -                                  s16 offset);
 +extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 +                             enum ip_conntrack_dir dir,
 +                             u32 seq);
  
  /* Fake conntrack entry for untracked connections */
  extern struct nf_conn nf_conntrack_untracked;
@@@ -293,11 -295,11 +293,11 @@@ extern unsigned int nf_conntrack_htable
  extern unsigned int nf_conntrack_max;
  
  #define NF_CT_STAT_INC(net, count)    \
-       (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+       __this_cpu_inc((net)->ct.stat->count)
  #define NF_CT_STAT_INC_ATOMIC(net, count)             \
  do {                                                  \
        local_bh_disable();                             \
-       per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++;   \
+       __this_cpu_inc((net)->ct.stat->count);          \
        local_bh_enable();                              \
  } while (0)
  
diff --combined kernel/lockdep.c
index 4f8df01dbe51ad05e1957fa7e1eecd20af7e0765,8631320a50d0fea1969968743f4a5c5d6b135121..429540c70d3f497ae7fc58998b0f248817a2cbf9
@@@ -49,7 -49,7 +49,7 @@@
  #include "lockdep_internals.h"
  
  #define CREATE_TRACE_POINTS
 -#include <trace/events/lockdep.h>
 +#include <trace/events/lock.h>
  
  #ifdef CONFIG_PROVE_LOCKING
  int prove_locking = 1;
@@@ -140,13 -140,9 +140,14 @@@ static inline struct lock_class *hlock_
  }
  
  #ifdef CONFIG_LOCK_STAT
- static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
+                     cpu_lock_stats);
  
 +static inline u64 lockstat_clock(void)
 +{
 +      return cpu_clock(smp_processor_id());
 +}
 +
  static int lock_point(unsigned long points[], unsigned long ip)
  {
        int i;
        return i;
  }
  
 -static void lock_time_inc(struct lock_time *lt, s64 time)
 +static void lock_time_inc(struct lock_time *lt, u64 time)
  {
        if (time > lt->max)
                lt->max = time;
  
 -      if (time < lt->min || !lt->min)
 +      if (time < lt->min || !lt->nr)
                lt->min = time;
  
        lt->total += time;
  
  static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
  {
 -      dst->min += src->min;
 -      dst->max += src->max;
 +      if (!src->nr)
 +              return;
 +
 +      if (src->max > dst->max)
 +              dst->max = src->max;
 +
 +      if (src->min < dst->min || !dst->nr)
 +              dst->min = src->min;
 +
        dst->total += src->total;
        dst->nr += src->nr;
  }
@@@ -198,7 -187,7 +199,7 @@@ struct lock_class_stats lock_stats(stru
        memset(&stats, 0, sizeof(struct lock_class_stats));
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *pcs =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
  
                for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                        stats.contention_point[i] += pcs->contention_point[i];
@@@ -225,7 -214,7 +226,7 @@@ void clear_lock_stats(struct lock_clas
  
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *cpu_stats =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
  
                memset(cpu_stats, 0, sizeof(struct lock_class_stats));
        }
  
  static struct lock_class_stats *get_lock_stats(struct lock_class *class)
  {
-       return &get_cpu_var(lock_stats)[class - lock_classes];
+       return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
  }
  
  static void put_lock_stats(struct lock_class_stats *stats)
  {
-       put_cpu_var(lock_stats);
+       put_cpu_var(cpu_lock_stats);
  }
  
  static void lock_release_holdtime(struct held_lock *hlock)
  {
        struct lock_class_stats *stats;
 -      s64 holdtime;
 +      u64 holdtime;
  
        if (!lock_stat)
                return;
  
 -      holdtime = sched_clock() - hlock->holdtime_stamp;
 +      holdtime = lockstat_clock() - hlock->holdtime_stamp;
  
        stats = get_lock_stats(hlock_class(hlock));
        if (hlock->read)
@@@ -386,8 -375,7 +387,8 @@@ static int save_trace(struct stack_trac
         * complete trace that maxes out the entries provided will be reported
         * as incomplete, friggin useless </rant>
         */
 -      if (trace->entries[trace->nr_entries-1] == ULONG_MAX)
 +      if (trace->nr_entries != 0 &&
 +          trace->entries[trace->nr_entries-1] == ULONG_MAX)
                trace->nr_entries--;
  
        trace->max_entries = trace->nr_entries;
@@@ -2805,7 -2793,7 +2806,7 @@@ static int __lock_acquire(struct lockde
        hlock->references = references;
  #ifdef CONFIG_LOCK_STAT
        hlock->waittime_stamp = 0;
 -      hlock->holdtime_stamp = sched_clock();
 +      hlock->holdtime_stamp = lockstat_clock();
  #endif
  
        if (check == 2 && !mark_irqflags(curr, hlock))
@@@ -3335,7 -3323,7 +3336,7 @@@ found_it
        if (hlock->instance != lock)
                return;
  
 -      hlock->waittime_stamp = sched_clock();
 +      hlock->waittime_stamp = lockstat_clock();
  
        contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
        contending_point = lock_point(hlock_class(hlock)->contending_point,
@@@ -3358,7 -3346,8 +3359,7 @@@ __lock_acquired(struct lockdep_map *loc
        struct held_lock *hlock, *prev_hlock;
        struct lock_class_stats *stats;
        unsigned int depth;
 -      u64 now;
 -      s64 waittime = 0;
 +      u64 now, waittime = 0;
        int i, cpu;
  
        depth = curr->lockdep_depth;
@@@ -3386,7 -3375,7 +3387,7 @@@ found_it
  
        cpu = smp_processor_id();
        if (hlock->waittime_stamp) {
 -              now = sched_clock();
 +              now = lockstat_clock();
                waittime = now - hlock->waittime_stamp;
                hlock->holdtime_stamp = now;
        }
diff --combined kernel/module.c
index 5842a71cf0527163c960cc8c41d549a5033e3443,64787cddeb5ee34c0ffb891e711e89d4f5b71b79..12afc5a3ddd3a73972604050695ff4ab4ef708f6
@@@ -370,8 -370,6 +370,6 @@@ EXPORT_SYMBOL_GPL(find_module)
  
  #ifdef CONFIG_SMP
  
- #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
  static void *percpu_modalloc(unsigned long size, unsigned long align,
                             const char *name)
  {
@@@ -395,154 -393,6 +393,6 @@@ static void percpu_modfree(void *freeme
        free_percpu(freeme);
  }
  
- #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
- /* Number of blocks used and allocated. */
- static unsigned int pcpu_num_used, pcpu_num_allocated;
- /* Size of each block.  -ve means used. */
- static int *pcpu_size;
- static int split_block(unsigned int i, unsigned short size)
- {
-       /* Reallocation required? */
-       if (pcpu_num_used + 1 > pcpu_num_allocated) {
-               int *new;
-               new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
-                              GFP_KERNEL);
-               if (!new)
-                       return 0;
-               pcpu_num_allocated *= 2;
-               pcpu_size = new;
-       }
-       /* Insert a new subblock */
-       memmove(&pcpu_size[i+1], &pcpu_size[i],
-               sizeof(pcpu_size[0]) * (pcpu_num_used - i));
-       pcpu_num_used++;
-       pcpu_size[i+1] -= size;
-       pcpu_size[i] = size;
-       return 1;
- }
- static inline unsigned int block_size(int val)
- {
-       if (val < 0)
-               return -val;
-       return val;
- }
- static void *percpu_modalloc(unsigned long size, unsigned long align,
-                            const char *name)
- {
-       unsigned long extra;
-       unsigned int i;
-       void *ptr;
-       int cpu;
-       if (align > PAGE_SIZE) {
-               printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
-                      name, align, PAGE_SIZE);
-               align = PAGE_SIZE;
-       }
-       ptr = __per_cpu_start;
-       for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-               /* Extra for alignment requirement. */
-               extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
-               BUG_ON(i == 0 && extra != 0);
-               if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
-                       continue;
-               /* Transfer extra to previous block. */
-               if (pcpu_size[i-1] < 0)
-                       pcpu_size[i-1] -= extra;
-               else
-                       pcpu_size[i-1] += extra;
-               pcpu_size[i] -= extra;
-               ptr += extra;
-               /* Split block if warranted */
-               if (pcpu_size[i] - size > sizeof(unsigned long))
-                       if (!split_block(i, size))
-                               return NULL;
-               /* add the per-cpu scanning areas */
-               for_each_possible_cpu(cpu)
-                       kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
-                                      GFP_KERNEL);
-               /* Mark allocated */
-               pcpu_size[i] = -pcpu_size[i];
-               return ptr;
-       }
-       printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-              size);
-       return NULL;
- }
- static void percpu_modfree(void *freeme)
- {
-       unsigned int i;
-       void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
-       int cpu;
-       /* First entry is core kernel percpu data. */
-       for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
-               if (ptr == freeme) {
-                       pcpu_size[i] = -pcpu_size[i];
-                       goto free;
-               }
-       }
-       BUG();
-  free:
-       /* remove the per-cpu scanning areas */
-       for_each_possible_cpu(cpu)
-               kmemleak_free(freeme + per_cpu_offset(cpu));
-       /* Merge with previous? */
-       if (pcpu_size[i-1] >= 0) {
-               pcpu_size[i-1] += pcpu_size[i];
-               pcpu_num_used--;
-               memmove(&pcpu_size[i], &pcpu_size[i+1],
-                       (pcpu_num_used - i) * sizeof(pcpu_size[0]));
-               i--;
-       }
-       /* Merge with next? */
-       if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
-               pcpu_size[i] += pcpu_size[i+1];
-               pcpu_num_used--;
-               memmove(&pcpu_size[i+1], &pcpu_size[i+2],
-                       (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
-       }
- }
- static int percpu_modinit(void)
- {
-       pcpu_num_used = 2;
-       pcpu_num_allocated = 2;
-       pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
-                           GFP_KERNEL);
-       /* Static in-kernel percpu data (used). */
-       pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
-       /* Free room. */
-       pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
-       if (pcpu_size[1] < 0) {
-               printk(KERN_ERR "No per-cpu room for modules.\n");
-               pcpu_num_used = 1;
-       }
-       return 0;
- }
- __initcall(percpu_modinit);
- #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
  static unsigned int find_pcpusec(Elf_Ehdr *hdr,
                                 Elf_Shdr *sechdrs,
                                 const char *secstrings)
@@@ -1187,8 -1037,7 +1037,8 @@@ static void add_sect_attrs(struct modul
  
        /* Count loaded sections and allocate structures */
        for (i = 0; i < nsect; i++)
 -              if (sechdrs[i].sh_flags & SHF_ALLOC)
 +              if (sechdrs[i].sh_flags & SHF_ALLOC
 +                  && sechdrs[i].sh_size)
                        nloaded++;
        size[0] = ALIGN(sizeof(*sect_attrs)
                        + nloaded * sizeof(sect_attrs->attrs[0]),
        for (i = 0; i < nsect; i++) {
                if (! (sechdrs[i].sh_flags & SHF_ALLOC))
                        continue;
 +              if (!sechdrs[i].sh_size)
 +                      continue;
                sattr->address = sechdrs[i].sh_addr;
                sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
                                        GFP_KERNEL);
diff --combined kernel/rcutorture.c
index a621a67ef4e3bd5e4aa97522cdff8b443db15e32,178967b6434e8e27061ac7b226b56c669de7e607..9bb52177af02a3e20aa347e3b65c0a236caa1922
@@@ -327,11 -327,6 +327,11 @@@ rcu_torture_cb(struct rcu_head *p
                cur_ops->deferred_free(rp);
  }
  
 +static int rcu_no_completed(void)
 +{
 +      return 0;
 +}
 +
  static void rcu_torture_deferred_free(struct rcu_torture *p)
  {
        call_rcu(&p->rtort_rcu, rcu_torture_cb);
@@@ -393,21 -388,6 +393,21 @@@ static struct rcu_torture_ops rcu_sync_
        .name           = "rcu_sync"
  };
  
 +static struct rcu_torture_ops rcu_expedited_ops = {
 +      .init           = rcu_sync_torture_init,
 +      .cleanup        = NULL,
 +      .readlock       = rcu_torture_read_lock,
 +      .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
 +      .readunlock     = rcu_torture_read_unlock,
 +      .completed      = rcu_no_completed,
 +      .deferred_free  = rcu_sync_torture_deferred_free,
 +      .sync           = synchronize_rcu_expedited,
 +      .cb_barrier     = NULL,
 +      .stats          = NULL,
 +      .irq_capable    = 1,
 +      .name           = "rcu_expedited"
 +};
 +
  /*
   * Definitions for rcu_bh torture testing.
   */
@@@ -567,25 -547,6 +567,25 @@@ static struct rcu_torture_ops srcu_ops 
        .name           = "srcu"
  };
  
 +static void srcu_torture_synchronize_expedited(void)
 +{
 +      synchronize_srcu_expedited(&srcu_ctl);
 +}
 +
 +static struct rcu_torture_ops srcu_expedited_ops = {
 +      .init           = srcu_torture_init,
 +      .cleanup        = srcu_torture_cleanup,
 +      .readlock       = srcu_torture_read_lock,
 +      .read_delay     = srcu_read_delay,
 +      .readunlock     = srcu_torture_read_unlock,
 +      .completed      = srcu_torture_completed,
 +      .deferred_free  = rcu_sync_torture_deferred_free,
 +      .sync           = srcu_torture_synchronize_expedited,
 +      .cb_barrier     = NULL,
 +      .stats          = srcu_torture_stats,
 +      .name           = "srcu_expedited"
 +};
 +
  /*
   * Definitions for sched torture testing.
   */
@@@ -601,6 -562,11 +601,6 @@@ static void sched_torture_read_unlock(i
        preempt_enable();
  }
  
 -static int sched_torture_completed(void)
 -{
 -      return 0;
 -}
 -
  static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
  {
        call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@@@ -617,7 -583,7 +617,7 @@@ static struct rcu_torture_ops sched_op
        .readlock       = sched_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
        .readunlock     = sched_torture_read_unlock,
 -      .completed      = sched_torture_completed,
 +      .completed      = rcu_no_completed,
        .deferred_free  = rcu_sched_torture_deferred_free,
        .sync           = sched_torture_synchronize,
        .cb_barrier     = rcu_barrier_sched,
        .name           = "sched"
  };
  
 -static struct rcu_torture_ops sched_ops_sync = {
 +static struct rcu_torture_ops sched_sync_ops = {
        .init           = rcu_sync_torture_init,
        .cleanup        = NULL,
        .readlock       = sched_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
        .readunlock     = sched_torture_read_unlock,
 -      .completed      = sched_torture_completed,
 +      .completed      = rcu_no_completed,
        .deferred_free  = rcu_sync_torture_deferred_free,
        .sync           = sched_torture_synchronize,
        .cb_barrier     = NULL,
        .name           = "sched_sync"
  };
  
 -extern int rcu_expedited_torture_stats(char *page);
 -
  static struct rcu_torture_ops sched_expedited_ops = {
        .init           = rcu_sync_torture_init,
        .cleanup        = NULL,
        .readlock       = sched_torture_read_lock,
        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
        .readunlock     = sched_torture_read_unlock,
 -      .completed      = sched_torture_completed,
 +      .completed      = rcu_no_completed,
        .deferred_free  = rcu_sync_torture_deferred_free,
        .sync           = synchronize_sched_expedited,
        .cb_barrier     = NULL,
@@@ -682,7 -650,7 +682,7 @@@ rcu_torture_writer(void *arg
                old_rp = rcu_torture_current;
                rp->rtort_mbtest = 1;
                rcu_assign_pointer(rcu_torture_current, rp);
 -              smp_wmb();
 +              smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
                if (old_rp) {
                        i = old_rp->rtort_pipe_count;
                        if (i > RCU_TORTURE_PIPE_LEN)
@@@ -763,13 -731,13 +763,13 @@@ static void rcu_torture_timer(unsigned 
                /* Should not happen, but... */
                pipe_count = RCU_TORTURE_PIPE_LEN;
        }
-       ++__get_cpu_var(rcu_torture_count)[pipe_count];
+       __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
        completed = cur_ops->completed() - completed;
        if (completed > RCU_TORTURE_PIPE_LEN) {
                /* Should not happen, but... */
                completed = RCU_TORTURE_PIPE_LEN;
        }
-       ++__get_cpu_var(rcu_torture_batch)[completed];
+       __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
        preempt_enable();
        cur_ops->readunlock(idx);
  }
@@@ -818,13 -786,13 +818,13 @@@ rcu_torture_reader(void *arg
                        /* Should not happen, but... */
                        pipe_count = RCU_TORTURE_PIPE_LEN;
                }
-               ++__get_cpu_var(rcu_torture_count)[pipe_count];
+               __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
                completed = cur_ops->completed() - completed;
                if (completed > RCU_TORTURE_PIPE_LEN) {
                        /* Should not happen, but... */
                        completed = RCU_TORTURE_PIPE_LEN;
                }
-               ++__get_cpu_var(rcu_torture_batch)[completed];
+               __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
                preempt_enable();
                cur_ops->readunlock(idx);
                schedule();
@@@ -1131,10 -1099,9 +1131,10 @@@ rcu_torture_init(void
        int cpu;
        int firsterr = 0;
        static struct rcu_torture_ops *torture_ops[] =
 -              { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
 -                &sched_expedited_ops,
 -                &srcu_ops, &sched_ops, &sched_ops_sync, };
 +              { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
 +                &rcu_bh_ops, &rcu_bh_sync_ops,
 +                &srcu_ops, &srcu_expedited_ops,
 +                &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
  
        mutex_lock(&fullstop_mutex);
  
                        break;
        }
        if (i == ARRAY_SIZE(torture_ops)) {
 -              printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
 +              printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
                       torture_type);
 +              printk(KERN_ALERT "rcu-torture types:");
 +              for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
 +                      printk(KERN_ALERT " %s", torture_ops[i]->name);
 +              printk(KERN_ALERT "\n");
                mutex_unlock(&fullstop_mutex);
                return -EINVAL;
        }
diff --combined kernel/sched.c
index ff39cadf621e91834dcdc9116d123f8912eab225,eecf070ffd1a1deeef02a1588b38c5cbcfe35968..fd05861b2111005a5a88b386d7d77ee036f5e160
@@@ -298,7 -298,7 +298,7 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
  
  #ifdef CONFIG_RT_GROUP_SCHED
  static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
- static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
  #endif /* CONFIG_RT_GROUP_SCHED */
  #else /* !CONFIG_USER_SCHED */
  #define root_task_group init_task_group
   */
  static DEFINE_SPINLOCK(task_group_lock);
  
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +
  #ifdef CONFIG_SMP
  static int root_task_group_empty(void)
  {
  }
  #endif
  
 -#ifdef CONFIG_FAIR_GROUP_SCHED
  #ifdef CONFIG_USER_SCHED
  # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
  #else /* !CONFIG_USER_SCHED */
@@@ -535,12 -534,14 +535,12 @@@ struct rq 
        #define CPU_LOAD_IDX_MAX 5
        unsigned long cpu_load[CPU_LOAD_IDX_MAX];
  #ifdef CONFIG_NO_HZ
 -      unsigned long last_tick_seen;
        unsigned char in_nohz_recently;
  #endif
        /* capture load from *all* tasks on this cpu: */
        struct load_weight load;
        unsigned long nr_load_updates;
        u64 nr_switches;
 -      u64 nr_migrations_in;
  
        struct cfs_rq cfs;
        struct rt_rq rt;
  
        u64 rt_avg;
        u64 age_stamp;
 +      u64 idle_stamp;
 +      u64 avg_idle;
  #endif
  
        /* calc_load related fields */
@@@ -677,7 -676,6 +677,7 @@@ inline void update_rq_clock(struct rq *
  
  /**
   * runqueue_is_locked
 + * @cpu: the processor in question.
   *
   * Returns true if the current cpu runqueue is locked.
   * This interface allows printk to be called with the runqueue lock
@@@ -772,7 -770,7 +772,7 @@@ sched_feat_write(struct file *filp, con
        if (!sched_feat_names[i])
                return -EINVAL;
  
 -      filp->f_pos += cnt;
 +      *ppos += cnt;
  
        return cnt;
  }
@@@ -814,7 -812,6 +814,7 @@@ const_debug unsigned int sysctl_sched_n
   * default: 0.25ms
   */
  unsigned int sysctl_sched_shares_ratelimit = 250000;
 +unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
  
  /*
   * Inject some fuzzyness into changing the per-cpu group shares
@@@ -1615,7 -1612,7 +1615,7 @@@ static void update_group_shares_cpu(str
   */
  static int tg_shares_up(struct task_group *tg, void *data)
  {
 -      unsigned long weight, rq_weight = 0, shares = 0;
 +      unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
        unsigned long *usd_rq_weight;
        struct sched_domain *sd = data;
        unsigned long flags;
                weight = tg->cfs_rq[i]->load.weight;
                usd_rq_weight[i] = weight;
  
 +              rq_weight += weight;
                /*
                 * If there are currently no tasks on the cpu pretend there
                 * is one of average load so that when a new task gets to
                if (!weight)
                        weight = NICE_0_LOAD;
  
 -              rq_weight += weight;
 +              sum_weight += weight;
                shares += tg->cfs_rq[i]->shares;
        }
  
 +      if (!rq_weight)
 +              rq_weight = sum_weight;
 +
        if ((!shares && rq_weight) || shares > tg->shares)
                shares = tg->shares;
  
@@@ -1815,22 -1808,6 +1815,22 @@@ static void cfs_rq_set_shares(struct cf
  #endif
  
  static void calc_load_account_active(struct rq *this_rq);
 +static void update_sysctl(void);
 +static int get_update_sysctl_factor(void);
 +
 +static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 +{
 +      set_task_rq(p, cpu);
 +#ifdef CONFIG_SMP
 +      /*
 +       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
 +       * successfuly executed on another CPU. We must ensure that updates of
 +       * per-task data have been completed by this moment.
 +       */
 +      smp_wmb();
 +      task_thread_info(p)->cpu = cpu;
 +#endif
 +}
  
  #include "sched_stats.h"
  #include "sched_idletask.c"
@@@ -1988,6 -1965,20 +1988,6 @@@ inline int task_curr(const struct task_
        return cpu_curr(task_cpu(p)) == p;
  }
  
 -static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 -{
 -      set_task_rq(p, cpu);
 -#ifdef CONFIG_SMP
 -      /*
 -       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
 -       * successfuly executed on another CPU. We must ensure that updates of
 -       * per-task data have been completed by this moment.
 -       */
 -      smp_wmb();
 -      task_thread_info(p)->cpu = cpu;
 -#endif
 -}
 -
  static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                                       const struct sched_class *prev_class,
                                       int oldprio, int running)
                p->sched_class->prio_changed(rq, p, oldprio, running);
  }
  
 +/**
 + * kthread_bind - bind a just-created kthread to a cpu.
 + * @p: thread created by kthread_create().
 + * @cpu: cpu (might not be online, must be possible) for @k to run on.
 + *
 + * Description: This function is equivalent to set_cpus_allowed(),
 + * except that @cpu doesn't need to be online, and the thread must be
 + * stopped (i.e., just returned from kthread_create()).
 + *
 + * Function lives here instead of kthread.c because it messes with
 + * scheduler internals which require locking.
 + */
 +void kthread_bind(struct task_struct *p, unsigned int cpu)
 +{
 +      struct rq *rq = cpu_rq(cpu);
 +      unsigned long flags;
 +
 +      /* Must have done schedule() in kthread() before we set_task_cpu */
 +      if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
 +              WARN_ON(1);
 +              return;
 +      }
 +
 +      spin_lock_irqsave(&rq->lock, flags);
 +      update_rq_clock(rq);
 +      set_task_cpu(p, cpu);
 +      p->cpus_allowed = cpumask_of_cpu(cpu);
 +      p->rt.nr_cpus_allowed = 1;
 +      p->flags |= PF_THREAD_BOUND;
 +      spin_unlock_irqrestore(&rq->lock, flags);
 +}
 +EXPORT_SYMBOL(kthread_bind);
 +
  #ifdef CONFIG_SMP
  /*
   * Is this task likely cache-hot:
@@@ -2045,7 -2003,7 +2045,7 @@@ task_hot(struct task_struct *p, u64 now
        /*
         * Buddy candidates are cache hot:
         */
 -      if (sched_feat(CACHE_HOT_BUDDY) &&
 +      if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
                        (&p->se == cfs_rq_of(&p->se)->next ||
                         &p->se == cfs_rq_of(&p->se)->last))
                return 1;
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
  {
        int old_cpu = task_cpu(p);
 -      struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
        struct cfs_rq *old_cfsrq = task_cfs_rq(p),
                      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 -      u64 clock_offset;
 -
 -      clock_offset = old_rq->clock - new_rq->clock;
  
        trace_sched_migrate_task(p, new_cpu);
  
 -#ifdef CONFIG_SCHEDSTATS
 -      if (p->se.wait_start)
 -              p->se.wait_start -= clock_offset;
 -      if (p->se.sleep_start)
 -              p->se.sleep_start -= clock_offset;
 -      if (p->se.block_start)
 -              p->se.block_start -= clock_offset;
 -#endif
        if (old_cpu != new_cpu) {
                p->se.nr_migrations++;
 -              new_rq->nr_migrations_in++;
 -#ifdef CONFIG_SCHEDSTATS
 -              if (task_hot(p, old_rq->clock, NULL))
 -                      schedstat_inc(p, se.nr_forced2_migrations);
 -#endif
                perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
                                     1, 1, NULL, 0);
        }
@@@ -2106,7 -2081,6 +2106,7 @@@ migrate_task(struct task_struct *p, in
         * it is sufficient to simply update the task's cpu field.
         */
        if (!p->se.on_rq && !task_running(rq, p)) {
 +              update_rq_clock(rq);
                set_task_cpu(p, dest_cpu);
                return 0;
        }
@@@ -2314,14 -2288,6 +2314,14 @@@ void task_oncpu_function_call(struct ta
        preempt_enable();
  }
  
 +#ifdef CONFIG_SMP
 +static inline
 +int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 +{
 +      return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 +}
 +#endif
 +
  /***
   * try_to_wake_up - wake up a thread
   * @p: the to-be-woken-up thread
@@@ -2341,7 -2307,7 +2341,7 @@@ static int try_to_wake_up(struct task_s
  {
        int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
 -      struct rq *rq;
 +      struct rq *rq, *orig_rq;
  
        if (!sched_feat(SYNC_WAKEUPS))
                wake_flags &= ~WF_SYNC;
        this_cpu = get_cpu();
  
        smp_wmb();
 -      rq = task_rq_lock(p, &flags);
 +      rq = orig_rq = task_rq_lock(p, &flags);
        update_rq_clock(rq);
        if (!(p->state & state))
                goto out;
        if (task_contributes_to_load(p))
                rq->nr_uninterruptible--;
        p->state = TASK_WAKING;
 -      task_rq_unlock(rq, &flags);
 +      __task_rq_unlock(rq);
  
 -      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 +      cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
        if (cpu != orig_cpu)
                set_task_cpu(p, cpu);
  
 -      rq = task_rq_lock(p, &flags);
 +      rq = __task_rq_lock(p);
 +      update_rq_clock(rq);
 +
        WARN_ON(p->state != TASK_WAKING);
        cpu = task_cpu(p);
  
@@@ -2438,17 -2402,6 +2438,17 @@@ out_running
  #ifdef CONFIG_SMP
        if (p->sched_class->task_wake_up)
                p->sched_class->task_wake_up(rq, p);
 +
 +      if (unlikely(rq->idle_stamp)) {
 +              u64 delta = rq->clock - rq->idle_stamp;
 +              u64 max = 2*sysctl_sched_migration_cost;
 +
 +              if (delta > max)
 +                      rq->avg_idle = max;
 +              else
 +                      update_avg(&rq->avg_idle, delta);
 +              rq->idle_stamp = 0;
 +      }
  #endif
  out:
        task_rq_unlock(rq, &flags);
@@@ -2495,6 -2448,7 +2495,6 @@@ static void __sched_fork(struct task_st
        p->se.avg_overlap               = 0;
        p->se.start_runtime             = 0;
        p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
 -      p->se.avg_running               = 0;
  
  #ifdef CONFIG_SCHEDSTATS
        p->se.wait_start                        = 0;
        p->se.nr_failed_migrations_running      = 0;
        p->se.nr_failed_migrations_hot          = 0;
        p->se.nr_forced_migrations              = 0;
 -      p->se.nr_forced2_migrations             = 0;
  
        p->se.nr_wakeups                        = 0;
        p->se.nr_wakeups_sync                   = 0;
@@@ -2555,18 -2510,23 +2555,18 @@@ void sched_fork(struct task_struct *p, 
  
        __sched_fork(p);
  
 -      /*
 -       * Make sure we do not leak PI boosting priority to the child.
 -       */
 -      p->prio = current->normal_prio;
 -
        /*
         * Revert to default priority/policy on fork if requested.
         */
        if (unlikely(p->sched_reset_on_fork)) {
 -              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
 +              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
                        p->policy = SCHED_NORMAL;
 -
 -              if (p->normal_prio < DEFAULT_PRIO)
 -                      p->prio = DEFAULT_PRIO;
 +                      p->normal_prio = p->static_prio;
 +              }
  
                if (PRIO_TO_NICE(p->static_prio) < 0) {
                        p->static_prio = NICE_TO_PRIO(0);
 +                      p->normal_prio = p->static_prio;
                        set_load_weight(p);
                }
  
                p->sched_reset_on_fork = 0;
        }
  
 +      /*
 +       * Make sure we do not leak PI boosting priority to the child.
 +       */
 +      p->prio = current->normal_prio;
 +
        if (!rt_prio(p->prio))
                p->sched_class = &fair_sched_class;
  
 +      if (p->sched_class->task_fork)
 +              p->sched_class->task_fork(p);
 +
  #ifdef CONFIG_SMP
 -      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
 +      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
  #endif
        set_task_cpu(p, cpu);
  
@@@ -2624,7 -2576,19 +2624,7 @@@ void wake_up_new_task(struct task_struc
        rq = task_rq_lock(p, &flags);
        BUG_ON(p->state != TASK_RUNNING);
        update_rq_clock(rq);
 -
 -      p->prio = effective_prio(p);
 -
 -      if (!p->sched_class->task_new || !current->se.on_rq) {
 -              activate_task(rq, p, 0);
 -      } else {
 -              /*
 -               * Let the scheduling class do new task startup
 -               * management (if any):
 -               */
 -              p->sched_class->task_new(rq, p);
 -              inc_nr_running(rq);
 -      }
 +      activate_task(rq, p, 0);
        trace_sched_wakeup_new(rq, p, 1);
        check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
@@@ -2848,14 -2812,14 +2848,14 @@@ context_switch(struct rq *rq, struct ta
         */
        arch_start_context_switch(prev);
  
 -      if (unlikely(!mm)) {
 +      if (likely(!mm)) {
                next->active_mm = oldmm;
                atomic_inc(&oldmm->mm_count);
                enter_lazy_tlb(oldmm, next);
        } else
                switch_mm(oldmm, mm, next);
  
 -      if (unlikely(!prev->mm)) {
 +      if (likely(!prev->mm)) {
                prev->active_mm = NULL;
                rq->prev_mm = oldmm;
        }
@@@ -3017,6 -2981,15 +3017,6 @@@ static void calc_load_account_active(st
        }
  }
  
 -/*
 - * Externally visible per-cpu scheduler statistics:
 - * cpu_nr_migrations(cpu) - number of migrations into that cpu
 - */
 -u64 cpu_nr_migrations(int cpu)
 -{
 -      return cpu_rq(cpu)->nr_migrations_in;
 -}
 -
  /*
   * Update rq->cpu_load[] statistics. This function is usually called every
   * scheduler tick (TICK_NSEC).
@@@ -3139,7 -3112,7 +3139,7 @@@ out
  void sched_exec(void)
  {
        int new_cpu, this_cpu = get_cpu();
 -      new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
 +      new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
        put_cpu();
        if (new_cpu != this_cpu)
                sched_migrate_task(current, new_cpu);
@@@ -3155,6 -3128,10 +3155,6 @@@ static void pull_task(struct rq *src_rq
        deactivate_task(src_rq, p, 0);
        set_task_cpu(p, this_cpu);
        activate_task(this_rq, p, 0);
 -      /*
 -       * Note that idle threads have a prio of MAX_PRIO, for this test
 -       * to be always true for them.
 -       */
        check_preempt_curr(this_rq, p, 0);
  }
  
@@@ -3677,7 -3654,6 +3677,7 @@@ static void update_group_power(struct s
  
  /**
   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
 + * @sd: The sched_domain whose statistics are to be updated.
   * @group: sched_group whose statistics are to be updated.
   * @this_cpu: Cpu for which load balance is currently performed.
   * @idle: Idle status of this_cpu
@@@ -4113,7 -4089,7 +4113,7 @@@ static int load_balance(int this_cpu, s
        unsigned long flags;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
 -      cpumask_setall(cpus);
 +      cpumask_copy(cpus, cpu_active_mask);
  
        /*
         * When power savings policy is enabled for the parent domain, idle
@@@ -4276,7 -4252,7 +4276,7 @@@ load_balance_newidle(int this_cpu, stru
        int all_pinned = 0;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
 -      cpumask_setall(cpus);
 +      cpumask_copy(cpus, cpu_active_mask);
  
        /*
         * When power savings policy is enabled for the parent domain, idle
@@@ -4416,11 -4392,6 +4416,11 @@@ static void idle_balance(int this_cpu, 
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
  
 +      this_rq->idle_stamp = this_rq->clock;
 +
 +      if (this_rq->avg_idle < sysctl_sched_migration_cost)
 +              return;
 +
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
  
                interval = msecs_to_jiffies(sd->balance_interval);
                if (time_after(next_balance, sd->last_balance + interval))
                        next_balance = sd->last_balance + interval;
 -              if (pulled_task)
 +              if (pulled_task) {
 +                      this_rq->idle_stamp = 0;
                        break;
 +              }
        }
        if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
                /*
@@@ -4673,7 -4642,7 +4673,7 @@@ int select_nohz_load_balancer(int stop_
                cpumask_set_cpu(cpu, nohz.cpu_mask);
  
                /* time for ilb owner also to sleep */
 -              if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
 +              if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
                        if (atomic_read(&nohz.load_balancer) == cpu)
                                atomic_set(&nohz.load_balancer, -1);
                        return 0;
@@@ -5040,13 -5009,8 +5040,13 @@@ static void account_guest_time(struct t
        p->gtime = cputime_add(p->gtime, cputime);
  
        /* Add guest time to cpustat. */
 -      cpustat->user = cputime64_add(cpustat->user, tmp);
 -      cpustat->guest = cputime64_add(cpustat->guest, tmp);
 +      if (TASK_NICE(p) > 0) {
 +              cpustat->nice = cputime64_add(cpustat->nice, tmp);
 +              cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
 +      } else {
 +              cpustat->user = cputime64_add(cpustat->user, tmp);
 +              cpustat->guest = cputime64_add(cpustat->guest, tmp);
 +      }
  }
  
  /*
@@@ -5161,86 -5125,60 +5161,86 @@@ void account_idle_ticks(unsigned long t
   * Use precise platform statistics if available:
   */
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 -cputime_t task_utime(struct task_struct *p)
 +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
 -      return p->utime;
 +      *ut = p->utime;
 +      *st = p->stime;
  }
  
 -cputime_t task_stime(struct task_struct *p)
 +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
 -      return p->stime;
 +      struct task_cputime cputime;
 +
 +      thread_group_cputime(p, &cputime);
 +
 +      *ut = cputime.utime;
 +      *st = cputime.stime;
  }
  #else
 -cputime_t task_utime(struct task_struct *p)
 +
 +#ifndef nsecs_to_cputime
 +# define nsecs_to_cputime(__nsecs)    nsecs_to_jiffies(__nsecs)
 +#endif
 +
 +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
 -      clock_t utime = cputime_to_clock_t(p->utime),
 -              total = utime + cputime_to_clock_t(p->stime);
 -      u64 temp;
 +      cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
  
        /*
         * Use CFS's precise accounting:
         */
 -      temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
 +      rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
  
        if (total) {
 -              temp *= utime;
 +              u64 temp;
 +
 +              temp = (u64)(rtime * utime);
                do_div(temp, total);
 -      }
 -      utime = (clock_t)temp;
 +              utime = (cputime_t)temp;
 +      } else
 +              utime = rtime;
 +
 +      /*
 +       * Compare with previous values, to keep monotonicity:
 +       */
 +      p->prev_utime = max(p->prev_utime, utime);
 +      p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
  
 -      p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
 -      return p->prev_utime;
 +      *ut = p->prev_utime;
 +      *st = p->prev_stime;
  }
  
 -cputime_t task_stime(struct task_struct *p)
 +/*
 + * Must be called with siglock held.
 + */
 +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
 -      clock_t stime;
 +      struct signal_struct *sig = p->signal;
 +      struct task_cputime cputime;
 +      cputime_t rtime, utime, total;
  
 -      /*
 -       * Use CFS's precise accounting. (we subtract utime from
 -       * the total, to make sure the total observed by userspace
 -       * grows monotonically - apps rely on that):
 -       */
 -      stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
 -                      cputime_to_clock_t(task_utime(p));
 +      thread_group_cputime(p, &cputime);
  
 -      if (stime >= 0)
 -              p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
 +      total = cputime_add(cputime.utime, cputime.stime);
 +      rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
  
 -      return p->prev_stime;
 -}
 -#endif
 +      if (total) {
 +              u64 temp;
  
 -inline cputime_t task_gtime(struct task_struct *p)
 -{
 -      return p->gtime;
 +              temp = (u64)(rtime * cputime.utime);
 +              do_div(temp, total);
 +              utime = (cputime_t)temp;
 +      } else
 +              utime = rtime;
 +
 +      sig->prev_utime = max(sig->prev_utime, utime);
 +      sig->prev_stime = max(sig->prev_stime,
 +                            cputime_sub(rtime, sig->prev_utime));
 +
 +      *ut = sig->prev_utime;
 +      *st = sig->prev_stime;
  }
 +#endif
  
  /*
   * This function gets called by the timer code, with HZ frequency.
@@@ -5375,14 -5313,13 +5375,14 @@@ static inline void schedule_debug(struc
  #endif
  }
  
 -static void put_prev_task(struct rq *rq, struct task_struct *p)
 +static void put_prev_task(struct rq *rq, struct task_struct *prev)
  {
 -      u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
 +      if (prev->state == TASK_RUNNING) {
 +              u64 runtime = prev->se.sum_exec_runtime;
  
 -      update_avg(&p->se.avg_running, runtime);
 +              runtime -= prev->se.prev_sum_exec_runtime;
 +              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
  
 -      if (p->state == TASK_RUNNING) {
                /*
                 * In order to avoid avg_overlap growing stale when we are
                 * indeed overlapping and hence not getting put to sleep, grow
                 * correlates to the amount of cache footprint a task can
                 * build up.
                 */
 -              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
 -              update_avg(&p->se.avg_overlap, runtime);
 -      } else {
 -              update_avg(&p->se.avg_running, 0);
 +              update_avg(&prev->se.avg_overlap, runtime);
        }
 -      p->sched_class->put_prev_task(rq, p);
 +      prev->sched_class->put_prev_task(rq, prev);
  }
  
  /*
@@@ -5504,7 -5444,7 +5504,7 @@@ need_resched_nonpreemptible
  }
  EXPORT_SYMBOL(schedule);
  
 -#ifdef CONFIG_SMP
 +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
  /*
   * Look out! "owner" is an entirely speculative pointer
   * access and not reliable.
@@@ -6198,14 -6138,22 +6198,14 @@@ __setscheduler(struct rq *rq, struct ta
        BUG_ON(p->se.on_rq);
  
        p->policy = policy;
 -      switch (p->policy) {
 -      case SCHED_NORMAL:
 -      case SCHED_BATCH:
 -      case SCHED_IDLE:
 -              p->sched_class = &fair_sched_class;
 -              break;
 -      case SCHED_FIFO:
 -      case SCHED_RR:
 -              p->sched_class = &rt_sched_class;
 -              break;
 -      }
 -
        p->rt_priority = prio;
        p->normal_prio = normal_prio(p);
        /* we are holding p->pi_lock already */
        p->prio = rt_mutex_getprio(p);
 +      if (rt_prio(p->prio))
 +              p->sched_class = &rt_sched_class;
 +      else
 +              p->sched_class = &fair_sched_class;
        set_load_weight(p);
  }
  
@@@ -6608,8 -6556,6 +6608,8 @@@ SYSCALL_DEFINE3(sched_setaffinity, pid_
  long sched_getaffinity(pid_t pid, struct cpumask *mask)
  {
        struct task_struct *p;
 +      unsigned long flags;
 +      struct rq *rq;
        int retval;
  
        get_online_cpus();
        if (retval)
                goto out_unlock;
  
 +      rq = task_rq_lock(p, &flags);
        cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
 +      task_rq_unlock(rq, &flags);
  
  out_unlock:
        read_unlock(&tasklist_lock);
@@@ -6772,6 -6716,9 +6772,6 @@@ EXPORT_SYMBOL(yield)
  /*
   * This task is about to go to sleep on IO. Increment rq->nr_iowait so
   * that process accounting knows that this is a task in IO wait state.
 - *
 - * But don't do that if it is a deliberate, throttling IO wait (this task
 - * has set its backing_dev_info: the queue against which it should throttle)
   */
  void __sched io_schedule(void)
  {
@@@ -6864,8 -6811,6 +6864,8 @@@ SYSCALL_DEFINE2(sched_rr_get_interval, 
  {
        struct task_struct *p;
        unsigned int time_slice;
 +      unsigned long flags;
 +      struct rq *rq;
        int retval;
        struct timespec t;
  
        if (retval)
                goto out_unlock;
  
 -      time_slice = p->sched_class->get_rr_interval(p);
 +      rq = task_rq_lock(p, &flags);
 +      time_slice = p->sched_class->get_rr_interval(rq, p);
 +      task_rq_unlock(rq, &flags);
  
        read_unlock(&tasklist_lock);
        jiffies_to_timespec(time_slice, &t);
@@@ -6958,7 -6901,7 +6958,7 @@@ void show_state_filter(unsigned long st
        /*
         * Only show locks if all tasks are dumped:
         */
 -      if (state_filter == -1)
 +      if (!state_filter)
                debug_show_all_locks();
  }
  
@@@ -6985,6 -6928,7 +6985,6 @@@ void __cpuinit init_idle(struct task_st
        __sched_fork(idle);
        idle->se.exec_start = sched_clock();
  
 -      idle->prio = idle->normal_prio = MAX_PRIO;
        cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
        __set_task_cpu(idle, cpu);
  
@@@ -7025,43 -6969,22 +7025,43 @@@ cpumask_var_t nohz_cpu_mask
   *
   * This idea comes from the SD scheduler of Con Kolivas:
   */
 -static inline void sched_init_granularity(void)
 +static int get_update_sysctl_factor(void)
  {
 -      unsigned int factor = 1 + ilog2(num_online_cpus());
 -      const unsigned long limit = 200000000;
 +      unsigned int cpus = min_t(int, num_online_cpus(), 8);
 +      unsigned int factor;
 +
 +      switch (sysctl_sched_tunable_scaling) {
 +      case SCHED_TUNABLESCALING_NONE:
 +              factor = 1;
 +              break;
 +      case SCHED_TUNABLESCALING_LINEAR:
 +              factor = cpus;
 +              break;
 +      case SCHED_TUNABLESCALING_LOG:
 +      default:
 +              factor = 1 + ilog2(cpus);
 +              break;
 +      }
  
 -      sysctl_sched_min_granularity *= factor;
 -      if (sysctl_sched_min_granularity > limit)
 -              sysctl_sched_min_granularity = limit;
 +      return factor;
 +}
  
 -      sysctl_sched_latency *= factor;
 -      if (sysctl_sched_latency > limit)
 -              sysctl_sched_latency = limit;
 +static void update_sysctl(void)
 +{
 +      unsigned int factor = get_update_sysctl_factor();
  
 -      sysctl_sched_wakeup_granularity *= factor;
 +#define SET_SYSCTL(name) \
 +      (sysctl_##name = (factor) * normalized_sysctl_##name)
 +      SET_SYSCTL(sched_min_granularity);
 +      SET_SYSCTL(sched_latency);
 +      SET_SYSCTL(sched_wakeup_granularity);
 +      SET_SYSCTL(sched_shares_ratelimit);
 +#undef SET_SYSCTL
 +}
  
 -      sysctl_sched_shares_ratelimit *= factor;
 +static inline void sched_init_granularity(void)
 +{
 +      update_sysctl();
  }
  
  #ifdef CONFIG_SMP
@@@ -7098,7 -7021,7 +7098,7 @@@ int set_cpus_allowed_ptr(struct task_st
        int ret = 0;
  
        rq = task_rq_lock(p, &flags);
 -      if (!cpumask_intersects(new_mask, cpu_online_mask)) {
 +      if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                ret = -EINVAL;
                goto out;
        }
        if (cpumask_test_cpu(task_cpu(p), new_mask))
                goto out;
  
 -      if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
 +      if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
                /* Need help from migration thread: drop lock and wait. */
                struct task_struct *mt = rq->migration_thread;
  
@@@ -7274,19 -7197,19 +7274,19 @@@ static void move_task_off_dead_cpu(int 
  
  again:
        /* Look for allowed, online CPU in same node. */
 -      for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
 +      for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
                if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
                        goto move;
  
        /* Any allowed, online CPU? */
 -      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
 +      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
        if (dest_cpu < nr_cpu_ids)
                goto move;
  
        /* No more Mr. Nice Guy. */
        if (dest_cpu >= nr_cpu_ids) {
                cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
 -              dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
 +              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
  
                /*
                 * Don't tell them about moving exiting tasks or
@@@ -7315,7 -7238,7 +7315,7 @@@ move
   */
  static void migrate_nr_uninterruptible(struct rq *rq_src)
  {
 -      struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
 +      struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
        unsigned long flags;
  
        local_irq_save(flags);
@@@ -7449,16 -7372,17 +7449,16 @@@ static struct ctl_table sd_ctl_dir[] = 
                .procname       = "sched_domain",
                .mode           = 0555,
        },
 -      {0, },
 +      {}
  };
  
  static struct ctl_table sd_ctl_root[] = {
        {
 -              .ctl_name       = CTL_KERN,
                .procname       = "kernel",
                .mode           = 0555,
                .child          = sd_ctl_dir,
        },
 -      {0, },
 +      {}
  };
  
  static struct ctl_table *sd_alloc_ctl_entry(int n)
@@@ -7568,7 -7492,7 +7568,7 @@@ static ctl_table *sd_alloc_ctl_cpu_tabl
  static struct ctl_table_header *sd_sysctl_header;
  static void register_sched_domain_sysctl(void)
  {
 -      int i, cpu_num = num_online_cpus();
 +      int i, cpu_num = num_possible_cpus();
        struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
        char buf[32];
  
        if (entry == NULL)
                return;
  
 -      for_each_online_cpu(i) {
 +      for_each_possible_cpu(i) {
                snprintf(buf, 32, "cpu%d", i);
                entry->procname = kstrdup(buf, GFP_KERNEL);
                entry->mode = 0555;
@@@ -7708,6 -7632,7 +7708,6 @@@ migration_call(struct notifier_block *n
                spin_lock_irq(&rq->lock);
                update_rq_clock(rq);
                deactivate_task(rq, rq->idle, 0);
 -              rq->idle->static_prio = MAX_PRIO;
                __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                rq->idle->sched_class = &idle_sched_class;
                migrate_dead_tasks(cpu);
@@@ -7781,16 -7706,6 +7781,16 @@@ early_initcall(migration_init)
  
  #ifdef CONFIG_SCHED_DEBUG
  
 +static __read_mostly int sched_domain_debug_enabled;
 +
 +static int __init sched_domain_debug_setup(char *str)
 +{
 +      sched_domain_debug_enabled = 1;
 +
 +      return 0;
 +}
 +early_param("sched_debug", sched_domain_debug_setup);
 +
  static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                  struct cpumask *groupmask)
  {
@@@ -7877,9 -7792,6 +7877,9 @@@ static void sched_domain_debug(struct s
        cpumask_var_t groupmask;
        int level = 0;
  
 +      if (!sched_domain_debug_enabled)
 +              return;
 +
        if (!sd) {
                printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
                return;
@@@ -7959,8 -7871,6 +7959,8 @@@ sd_parent_degenerate(struct sched_domai
  
  static void free_rootdomain(struct root_domain *rd)
  {
 +      synchronize_sched();
 +
        cpupri_cleanup(&rd->cpupri);
  
        free_cpumask_var(rd->rto_mask);
@@@ -8101,7 -8011,6 +8101,7 @@@ static cpumask_var_t cpu_isolated_map
  /* Setup the mask of cpus configured for isolated domains */
  static int __init isolated_cpu_setup(char *str)
  {
 +      alloc_bootmem_cpumask_var(&cpu_isolated_map);
        cpulist_parse(str, cpu_isolated_map);
        return 1;
  }
@@@ -8286,14 -8195,14 +8286,14 @@@ enum s_alloc 
   */
  #ifdef CONFIG_SCHED_SMT
  static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
- static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+ static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
  
  static int
  cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
                 struct sched_group **sg, struct cpumask *unused)
  {
        if (sg)
-               *sg = &per_cpu(sched_group_cpus, cpu).sg;
+               *sg = &per_cpu(sched_groups, cpu).sg;
        return cpu;
  }
  #endif /* CONFIG_SCHED_SMT */
@@@ -8938,7 -8847,7 +8938,7 @@@ static int build_sched_domains(const st
        return __build_sched_domains(cpu_map, NULL);
  }
  
 -static struct cpumask *doms_cur;      /* current sched domains */
 +static cpumask_var_t *doms_cur;       /* current sched domains */
  static int ndoms_cur;         /* number of sched domains in 'doms_cur' */
  static struct sched_domain_attr *dattr_cur;
                                /* attribues of custom domains in 'doms_cur' */
@@@ -8960,31 -8869,6 +8960,31 @@@ int __attribute__((weak)) arch_update_c
        return 0;
  }
  
 +cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
 +{
 +      int i;
 +      cpumask_var_t *doms;
 +
 +      doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
 +      if (!doms)
 +              return NULL;
 +      for (i = 0; i < ndoms; i++) {
 +              if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
 +                      free_sched_domains(doms, i);
 +                      return NULL;
 +              }
 +      }
 +      return doms;
 +}
 +
 +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
 +{
 +      unsigned int i;
 +      for (i = 0; i < ndoms; i++)
 +              free_cpumask_var(doms[i]);
 +      kfree(doms);
 +}
 +
  /*
   * Set up scheduler domains and groups. Callers must hold the hotplug lock.
   * For now this just excludes isolated cpus, but could be used to
@@@ -8996,12 -8880,12 +8996,12 @@@ static int arch_init_sched_domains(cons
  
        arch_update_cpu_topology();
        ndoms_cur = 1;
 -      doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
 +      doms_cur = alloc_sched_domains(ndoms_cur);
        if (!doms_cur)
 -              doms_cur = fallback_doms;
 -      cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
 +              doms_cur = &fallback_doms;
 +      cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
        dattr_cur = NULL;
 -      err = build_sched_domains(doms_cur);
 +      err = build_sched_domains(doms_cur[0]);
        register_sched_domain_sysctl();
  
        return err;
@@@ -9051,19 -8935,19 +9051,19 @@@ static int dattrs_equal(struct sched_do
   * doms_new[] to the current sched domain partitioning, doms_cur[].
   * It destroys each deleted domain and builds each new domain.
   *
 - * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
 + * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
   * The masks don't intersect (don't overlap.) We should setup one
   * sched domain for each mask. CPUs not in any of the cpumasks will
   * not be load balanced. If the same cpumask appears both in the
   * current 'doms_cur' domains and in the new 'doms_new', we can leave
   * it as it is.
   *
 - * The passed in 'doms_new' should be kmalloc'd. This routine takes
 - * ownership of it and will kfree it when done with it. If the caller
 - * failed the kmalloc call, then it can pass in doms_new == NULL &&
 - * ndoms_new == 1, and partition_sched_domains() will fallback to
 - * the single partition 'fallback_doms', it also forces the domains
 - * to be rebuilt.
 + * The passed in 'doms_new' should be allocated using
 + * alloc_sched_domains.  This routine takes ownership of it and will
 + * free_sched_domains it when done with it. If the caller failed the
 + * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
 + * and partition_sched_domains() will fallback to the single partition
 + * 'fallback_doms', it also forces the domains to be rebuilt.
   *
   * If doms_new == NULL it will be replaced with cpu_online_mask.
   * ndoms_new == 0 is a special case for destroying existing domains,
   *
   * Call with hotplug lock held
   */
 -/* FIXME: Change to struct cpumask *doms_new[] */
 -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
                             struct sched_domain_attr *dattr_new)
  {
        int i, j, n;
        /* Destroy deleted domains */
        for (i = 0; i < ndoms_cur; i++) {
                for (j = 0; j < n && !new_topology; j++) {
 -                      if (cpumask_equal(&doms_cur[i], &doms_new[j])
 +                      if (cpumask_equal(doms_cur[i], doms_new[j])
                            && dattrs_equal(dattr_cur, i, dattr_new, j))
                                goto match1;
                }
                /* no match - a current sched domain not in new doms_new[] */
 -              detach_destroy_domains(doms_cur + i);
 +              detach_destroy_domains(doms_cur[i]);
  match1:
                ;
        }
  
        if (doms_new == NULL) {
                ndoms_cur = 0;
 -              doms_new = fallback_doms;
 -              cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
 +              doms_new = &fallback_doms;
 +              cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
                WARN_ON_ONCE(dattr_new);
        }
  
        /* Build new domains */
        for (i = 0; i < ndoms_new; i++) {
                for (j = 0; j < ndoms_cur && !new_topology; j++) {
 -                      if (cpumask_equal(&doms_new[i], &doms_cur[j])
 +                      if (cpumask_equal(doms_new[i], doms_cur[j])
                            && dattrs_equal(dattr_new, i, dattr_cur, j))
                                goto match2;
                }
                /* no match - add a new doms_new */
 -              __build_sched_domains(doms_new + i,
 +              __build_sched_domains(doms_new[i],
                                        dattr_new ? dattr_new + i : NULL);
  match2:
                ;
        }
  
        /* Remember the new sched domains */
 -      if (doms_cur != fallback_doms)
 -              kfree(doms_cur);
 +      if (doms_cur != &fallback_doms)
 +              free_sched_domains(doms_cur, ndoms_cur);
        kfree(dattr_cur);       /* kfree(NULL) is safe */
        doms_cur = doms_new;
        dattr_cur = dattr_new;
@@@ -9234,10 -9119,8 +9234,10 @@@ static int update_sched_domains(struct 
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
 -      case CPU_DEAD:
 -      case CPU_DEAD_FROZEN:
 +      case CPU_DOWN_PREPARE:
 +      case CPU_DOWN_PREPARE_FROZEN:
 +      case CPU_DOWN_FAILED:
 +      case CPU_DOWN_FAILED_FROZEN:
                partition_sched_domains(1, NULL, NULL);
                return NOTIFY_OK;
  
@@@ -9284,7 -9167,7 +9284,7 @@@ void __init sched_init_smp(void
  #endif
        get_online_cpus();
        mutex_lock(&sched_domains_mutex);
 -      arch_init_sched_domains(cpu_online_mask);
 +      arch_init_sched_domains(cpu_active_mask);
        cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
        if (cpumask_empty(non_isolated_cpus))
                cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@@ -9447,6 -9330,10 +9447,6 @@@ void __init sched_init(void
  #ifdef CONFIG_CPUMASK_OFFSTACK
        alloc_size += num_possible_cpus() * cpumask_size();
  #endif
 -      /*
 -       * As sched_init() is called before page_alloc is setup,
 -       * we use alloc_bootmem().
 -       */
        if (alloc_size) {
                ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
  
  #elif defined CONFIG_USER_SCHED
                init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
                init_tg_rt_entry(&init_task_group,
-                               &per_cpu(init_rt_rq, i),
+                               &per_cpu(init_rt_rq_var, i),
                                &per_cpu(init_sched_rt_entity, i), i, 1,
                                root_task_group.rt_se[i]);
  #endif
                rq->cpu = i;
                rq->online = 0;
                rq->migration_thread = NULL;
 +              rq->idle_stamp = 0;
 +              rq->avg_idle = 2*sysctl_sched_migration_cost;
                INIT_LIST_HEAD(&rq->migration_queue);
                rq_attach_root(rq, &def_root_domain);
  #endif
        current->sched_class = &fair_sched_class;
  
        /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
 -      alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 +      zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
  #ifdef CONFIG_SMP
  #ifdef CONFIG_NO_HZ
 -      alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
 +      zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
        alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
  #endif
 -      alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 +      /* May be allocated at isolcpus cmdline parse time */
 +      if (cpu_isolated_map == NULL)
 +              zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
  #endif /* SMP */
  
        perf_event_init();
@@@ -9848,15 -9731,13 +9848,15 @@@ int alloc_fair_sched_group(struct task_
                se = kzalloc_node(sizeof(struct sched_entity),
                                  GFP_KERNEL, cpu_to_node(i));
                if (!se)
 -                      goto err;
 +                      goto err_free_rq;
  
                init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
        }
  
        return 1;
  
 + err_free_rq:
 +      kfree(cfs_rq);
   err:
        return 0;
  }
@@@ -9938,15 -9819,13 +9938,15 @@@ int alloc_rt_sched_group(struct task_gr
                rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
                                     GFP_KERNEL, cpu_to_node(i));
                if (!rt_se)
 -                      goto err;
 +                      goto err_free_rq;
  
                init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
        }
  
        return 1;
  
 + err_free_rq:
 +      kfree(rt_rq);
   err:
        return 0;
  }
@@@ -10988,7 -10867,6 +10988,7 @@@ void synchronize_sched_expedited(void
                spin_unlock_irqrestore(&rq->lock, flags);
        }
        rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
 +      synchronize_sched_expedited_count++;
        mutex_unlock(&rcu_sched_expedited_mutex);
        put_online_cpus();
        if (need_full_sync)
diff --combined kernel/softirq.c
index 21939d9e830e22b040f15e5562c97b0a8b8a9e47,0740dfd55c51f8b6ffde0ae0c870eb9b51b36373..a09502e2ef758721917ab537e345da0036197628
@@@ -302,9 -302,9 +302,9 @@@ void irq_exit(void
        if (!in_interrupt() && local_softirq_pending())
                invoke_softirq();
  
 +      rcu_irq_exit();
  #ifdef CONFIG_NO_HZ
        /* Make sure that timer wheel updates are propagated */
 -      rcu_irq_exit();
        if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
                tick_nohz_stop_sched_tick(0);
  #endif
@@@ -697,7 -697,7 +697,7 @@@ void __init softirq_init(void
        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  }
  
- static int ksoftirqd(void * __bind_cpu)
+ static int run_ksoftirqd(void * __bind_cpu)
  {
        set_current_state(TASK_INTERRUPTIBLE);
  
@@@ -810,7 -810,7 +810,7 @@@ static int __cpuinit cpu_callback(struc
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+               p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk("ksoftirqd for %i failed\n", hotcpu);
                        return NOTIFY_BAD;
diff --combined kernel/trace/trace.c
index 88bd9ae2a9ed9b71b881c665f7cdb4eb722cf309,85a5ed70b5b237326732593248e98066daf2c9d9..c82dfd92fdfd8d663b6146ea6a584d42ea715508
@@@ -86,17 -86,17 +86,17 @@@ static int dummy_set_flag(u32 old_flags
   */
  static int tracing_disabled = 1;
  
- DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
  
  static inline void ftrace_disable_cpu(void)
  {
        preempt_disable();
-       local_inc(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
  }
  
  static inline void ftrace_enable_cpu(void)
  {
-       local_dec(&__get_cpu_var(ftrace_cpu_disabled));
+       __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
        preempt_enable();
  }
  
@@@ -129,7 -129,7 +129,7 @@@ static int tracing_set_tracer(const cha
  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
  static char *default_bootup_tracer;
  
 -static int __init set_ftrace(char *str)
 +static int __init set_cmdline_ftrace(char *str)
  {
        strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
        ring_buffer_expanded = 1;
        return 1;
  }
 -__setup("ftrace=", set_ftrace);
 +__setup("ftrace=", set_cmdline_ftrace);
  
  static int __init set_ftrace_dump_on_oops(char *str)
  {
@@@ -203,7 -203,7 +203,7 @@@ cycle_t ftrace_now(int cpu
   */
  static struct trace_array     max_tr;
  
- static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+ static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
  
  /* tracer_enabled is used to toggle activation of a tracer */
  static int                    tracer_enabled = 1;
@@@ -1085,7 -1085,7 +1085,7 @@@ trace_function(struct trace_array *tr
        struct ftrace_entry *entry;
  
        /* If we are reading the ring buffer, don't trace */
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return;
  
        event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@@ -1363,6 -1363,9 +1363,6 @@@ int trace_array_vprintk(struct trace_ar
        __raw_spin_lock(&trace_buf_lock);
        len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
  
 -      len = min(len, TRACE_BUF_SIZE-1);
 -      trace_buf[len] = 0;
 -
        size = sizeof(*entry) + len + 1;
        buffer = tr->buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
        if (!event)
                goto out_unlock;
        entry = ring_buffer_event_data(event);
 -      entry->ip                       = ip;
 +      entry->ip = ip;
  
        memcpy(&entry->buf, trace_buf, len);
 -      entry->buf[len] = 0;
 +      entry->buf[len] = '\0';
        if (!filter_check_discard(call, entry, buffer, event))
                ring_buffer_unlock_commit(buffer, event);
  
  
  int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
  {
 -      return trace_array_printk(&global_trace, ip, fmt, args);
 +      return trace_array_vprintk(&global_trace, ip, fmt, args);
  }
  EXPORT_SYMBOL_GPL(trace_vprintk);
  
@@@ -1512,8 -1515,6 +1512,8 @@@ static void *s_next(struct seq_file *m
        int i = (int)*pos;
        void *ent;
  
 +      WARN_ON_ONCE(iter->leftover);
 +
        (*pos)++;
  
        /* can't go backwards */
@@@ -1612,16 -1613,8 +1612,16 @@@ static void *s_start(struct seq_file *m
                        ;
  
        } else {
 -              l = *pos - 1;
 -              p = s_next(m, p, &l);
 +              /*
 +               * If we overflowed the seq_file before, then we want
 +               * to just reuse the trace_seq buffer again.
 +               */
 +              if (iter->leftover)
 +                      p = iter;
 +              else {
 +                      l = *pos - 1;
 +                      p = s_next(m, p, &l);
 +              }
        }
  
        trace_event_read_lock();
@@@ -1929,7 -1922,6 +1929,7 @@@ static enum print_line_t print_trace_li
  static int s_show(struct seq_file *m, void *v)
  {
        struct trace_iterator *iter = v;
 +      int ret;
  
        if (iter->ent == NULL) {
                if (iter->tr) {
                        if (!(trace_flags & TRACE_ITER_VERBOSE))
                                print_func_help_header(m);
                }
 +      } else if (iter->leftover) {
 +              /*
 +               * If we filled the seq_file buffer earlier, we
 +               * want to just show it now.
 +               */
 +              ret = trace_print_seq(m, &iter->seq);
 +
 +              /* ret should this time be zero, but you never know */
 +              iter->leftover = ret;
 +
        } else {
                print_trace_line(iter);
 -              trace_print_seq(m, &iter->seq);
 +              ret = trace_print_seq(m, &iter->seq);
 +              /*
 +               * If we overflow the seq_file buffer, then it will
 +               * ask us for this data again at start up.
 +               * Use that instead.
 +               *  ret is 0 if seq_file write succeeded.
 +               *        -1 otherwise.
 +               */
 +              iter->leftover = ret;
        }
  
        return 0;
@@@ -2466,7 -2440,7 +2466,7 @@@ tracing_trace_options_write(struct fil
                        return ret;
        }
  
 -      filp->f_pos += cnt;
 +      *ppos += cnt;
  
        return cnt;
  }
@@@ -2608,7 -2582,7 +2608,7 @@@ tracing_ctrl_write(struct file *filp, c
        }
        mutex_unlock(&trace_types_lock);
  
 -      filp->f_pos += cnt;
 +      *ppos += cnt;
  
        return cnt;
  }
@@@ -2790,7 -2764,7 +2790,7 @@@ tracing_set_trace_write(struct file *fi
        if (err)
                return err;
  
 -      filp->f_pos += ret;
 +      *ppos += ret;
  
        return ret;
  }
@@@ -2923,10 -2897,6 +2923,10 @@@ static int tracing_release_pipe(struct 
        else
                cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
  
 +
 +      if (iter->trace->pipe_close)
 +              iter->trace->pipe_close(iter);
 +
        mutex_unlock(&trace_types_lock);
  
        free_cpumask_var(iter->started);
@@@ -3329,7 -3299,7 +3329,7 @@@ tracing_entries_write(struct file *filp
                }
        }
  
 -      filp->f_pos += cnt;
 +      *ppos += cnt;
  
        /* If check pages failed, return ENOMEM */
        if (tracing_disabled)
@@@ -3364,6 -3334,7 +3364,6 @@@ tracing_mark_write(struct file *filp, c
                                        size_t cnt, loff_t *fpos)
  {
        char *buf;
 -      char *end;
  
        if (tracing_disabled)
                return -EINVAL;
        if (cnt > TRACE_BUF_SIZE)
                cnt = TRACE_BUF_SIZE;
  
 -      buf = kmalloc(cnt + 1, GFP_KERNEL);
 +      buf = kmalloc(cnt + 2, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;
  
                kfree(buf);
                return -EFAULT;
        }
 +      if (buf[cnt-1] != '\n') {
 +              buf[cnt] = '\n';
 +              buf[cnt+1] = '\0';
 +      } else
 +              buf[cnt] = '\0';
  
 -      /* Cut from the first nil or newline. */
 -      buf[cnt] = '\0';
 -      end = strchr(buf, '\n');
 -      if (end)
 -              *end = '\0';
 -
 -      cnt = mark_printk("%s\n", buf);
 +      cnt = mark_printk("%s", buf);
        kfree(buf);
        *fpos += cnt;
  
@@@ -3758,7 -3730,7 +3758,7 @@@ tracing_stats_read(struct file *filp, c
  
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s)
 -              return ENOMEM;
 +              return -ENOMEM;
  
        trace_seq_init(s);
  
@@@ -4454,7 -4426,7 +4454,7 @@@ __init static int tracer_alloc_buffers(
        /* Allocate the first page for all buffers */
        for_each_tracing_cpu(i) {
                global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_data, i);
+               max_tr.data[i] = &per_cpu(max_tr_data, i);
        }
  
        trace_init_cmdlines();
diff --combined kernel/trace/trace.h
index 7fa33cab69629299a9d4470fbf13db81d20e3c94,542f45554883bcf3a1a75f79e2804b6c07e7d8ab..a52bed2eedd848ec5e7c249d479baa8d14a98ad6
@@@ -11,7 -11,6 +11,7 @@@
  #include <linux/ftrace.h>
  #include <trace/boot.h>
  #include <linux/kmemtrace.h>
 +#include <linux/hw_breakpoint.h>
  
  #include <linux/trace_seq.h>
  #include <linux/ftrace_event.h>
@@@ -38,7 -37,6 +38,7 @@@ enum trace_type 
        TRACE_KMEM_ALLOC,
        TRACE_KMEM_FREE,
        TRACE_BLK,
 +      TRACE_KSYM,
  
        __TRACE_LAST_TYPE,
  };
@@@ -100,32 -98,9 +100,32 @@@ struct syscall_trace_enter 
  struct syscall_trace_exit {
        struct trace_entry      ent;
        int                     nr;
 -      unsigned long           ret;
 +      long                    ret;
  };
  
 +struct kprobe_trace_entry {
 +      struct trace_entry      ent;
 +      unsigned long           ip;
 +      int                     nargs;
 +      unsigned long           args[];
 +};
 +
 +#define SIZEOF_KPROBE_TRACE_ENTRY(n)                  \
 +      (offsetof(struct kprobe_trace_entry, args) +    \
 +      (sizeof(unsigned long) * (n)))
 +
 +struct kretprobe_trace_entry {
 +      struct trace_entry      ent;
 +      unsigned long           func;
 +      unsigned long           ret_ip;
 +      int                     nargs;
 +      unsigned long           args[];
 +};
 +
 +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)                       \
 +      (offsetof(struct kretprobe_trace_entry, args) + \
 +      (sizeof(unsigned long) * (n)))
 +
  /*
   * trace_flag_type is an enumeration that holds different
   * states when a trace occurs. These are:
@@@ -234,7 -209,6 +234,7 @@@ extern void __ftrace_bad_type(void)
                          TRACE_KMEM_ALLOC);    \
                IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
                          TRACE_KMEM_FREE);     \
 +              IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
                __ftrace_bad_type();                                    \
        } while (0)
  
@@@ -272,7 -246,6 +272,7 @@@ struct tracer_flags 
   * @pipe_open: called when the trace_pipe file is opened
   * @wait_pipe: override how the user waits for traces on trace_pipe
   * @close: called when the trace file is released
 + * @pipe_close: called when the trace_pipe file is released
   * @read: override the default read callback on trace_pipe
   * @splice_read: override the default splice_read callback on trace_pipe
   * @selftest: selftest to run on boot (see trace_selftest.c)
@@@ -291,7 -264,6 +291,7 @@@ struct tracer 
        void                    (*pipe_open)(struct trace_iterator *iter);
        void                    (*wait_pipe)(struct trace_iterator *iter);
        void                    (*close)(struct trace_iterator *iter);
 +      void                    (*pipe_close)(struct trace_iterator *iter);
        ssize_t                 (*read)(struct trace_iterator *iter,
                                        struct file *filp, char __user *ubuf,
                                        size_t cnt, loff_t *ppos);
@@@ -392,8 -364,6 +392,8 @@@ int register_tracer(struct tracer *type
  void unregister_tracer(struct tracer *type);
  int is_tracing_stopped(void);
  
 +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
 +
  extern unsigned long nsecs_to_usecs(unsigned long nsecs);
  
  #ifdef CONFIG_TRACER_MAX_TRACE
@@@ -443,7 -413,7 +443,7 @@@ extern int DYN_FTRACE_TEST_NAME(void)
  
  extern int ring_buffer_expanded;
  extern bool tracing_selftest_disabled;
- DECLARE_PER_CPU(local_t, ftrace_cpu_disabled);
+ DECLARE_PER_CPU(int, ftrace_cpu_disabled);
  
  #ifdef CONFIG_FTRACE_STARTUP_TEST
  extern int trace_selftest_startup_function(struct tracer *trace,
@@@ -468,8 -438,6 +468,8 @@@ extern int trace_selftest_startup_branc
                                         struct trace_array *tr);
  extern int trace_selftest_startup_hw_branches(struct tracer *trace,
                                              struct trace_array *tr);
 +extern int trace_selftest_startup_ksym(struct tracer *trace,
 +                                       struct trace_array *tr);
  #endif /* CONFIG_FTRACE_STARTUP_TEST */
  
  extern void *head_page(struct trace_array_cpu *data);
@@@ -515,6 -483,10 +515,6 @@@ static inline int ftrace_graph_addr(uns
        return 0;
  }
  #else
 -static inline int ftrace_trace_addr(unsigned long addr)
 -{
 -      return 1;
 -}
  static inline int ftrace_graph_addr(unsigned long addr)
  {
        return 1;
@@@ -528,12 -500,12 +528,12 @@@ print_graph_function(struct trace_itera
  }
  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
  
 -extern struct pid *ftrace_pid_trace;
 +extern struct list_head ftrace_pids;
  
  #ifdef CONFIG_FUNCTION_TRACER
  static inline int ftrace_trace_task(struct task_struct *task)
  {
 -      if (!ftrace_pid_trace)
 +      if (list_empty(&ftrace_pids))
                return 1;
  
        return test_tsk_trace_trace(task);
@@@ -715,6 -687,7 +715,6 @@@ struct event_filter 
        int                     n_preds;
        struct filter_pred      **preds;
        char                    *filter_string;
 -      bool                    no_reset;
  };
  
  struct event_subsystem {
  };
  
  struct filter_pred;
 +struct regex;
  
  typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
                                 int val1, int val2);
  
 +typedef int (*regex_match_func)(char *str, struct regex *r, int len);
 +
 +enum regex_type {
 +      MATCH_FULL = 0,
 +      MATCH_FRONT_ONLY,
 +      MATCH_MIDDLE_ONLY,
 +      MATCH_END_ONLY,
 +};
 +
 +struct regex {
 +      char                    pattern[MAX_FILTER_STR_VAL];
 +      int                     len;
 +      int                     field_len;
 +      regex_match_func        match;
 +};
 +
  struct filter_pred {
 -      filter_pred_fn_t fn;
 -      u64 val;
 -      char str_val[MAX_FILTER_STR_VAL];
 -      int str_len;
 -      char *field_name;
 -      int offset;
 -      int not;
 -      int op;
 -      int pop_n;
 +      filter_pred_fn_t        fn;
 +      u64                     val;
 +      struct regex            regex;
 +      char                    *field_name;
 +      int                     offset;
 +      int                     not;
 +      int                     op;
 +      int                     pop_n;
  };
  
 +extern enum regex_type
 +filter_parse_regex(char *buff, int len, char **search, int *not);
  extern void print_event_filter(struct ftrace_event_call *call,
                               struct trace_seq *s);
  extern int apply_event_filter(struct ftrace_event_call *call,
@@@ -775,8 -730,7 +775,8 @@@ filter_check_discard(struct ftrace_even
                     struct ring_buffer *buffer,
                     struct ring_buffer_event *event)
  {
 -      if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
 +      if (unlikely(call->filter_active) &&
 +          !filter_match_preds(call->filter, rec)) {
                ring_buffer_discard_commit(buffer, event);
                return 1;
        }
index a43d009c561a28bcbf1078626e086c63bf475d5e,90a6daa10962432e9bb40ff439a26d3add840fe9..b1342c5d37cfb821cfb96fcfd610cb95cdfb1082
  #include "trace.h"
  #include "trace_output.h"
  
 -struct fgraph_data {
 +struct fgraph_cpu_data {
        pid_t           last_pid;
        int             depth;
 +      int             ignore;
 +};
 +
 +struct fgraph_data {
 +      struct fgraph_cpu_data          *cpu_data;
 +
 +      /* Place to preserve last processed entry. */
 +      struct ftrace_graph_ent_entry   ent;
 +      struct ftrace_graph_ret_entry   ret;
 +      int                             failed;
 +      int                             cpu;
  };
  
  #define TRACE_GRAPH_INDENT    2
@@@ -187,7 -176,7 +187,7 @@@ static int __trace_graph_entry(struct t
        struct ring_buffer *buffer = tr->buffer;
        struct ftrace_graph_ent_entry *entry;
  
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return 0;
  
        event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@@ -251,7 -240,7 +251,7 @@@ static void __trace_graph_return(struc
        struct ring_buffer *buffer = tr->buffer;
        struct ftrace_graph_ret_entry *entry;
  
-       if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+       if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
                return;
  
        event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@@ -395,7 -384,7 +395,7 @@@ verif_pid(struct trace_seq *s, pid_t pi
        if (!data)
                return TRACE_TYPE_HANDLED;
  
 -      last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
 +      last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
  
        if (*last_pid == pid)
                return TRACE_TYPE_HANDLED;
@@@ -446,49 -435,26 +446,49 @@@ static struct ftrace_graph_ret_entry 
  get_return_for_leaf(struct trace_iterator *iter,
                struct ftrace_graph_ent_entry *curr)
  {
 -      struct ring_buffer_iter *ring_iter;
 +      struct fgraph_data *data = iter->private;
 +      struct ring_buffer_iter *ring_iter = NULL;
        struct ring_buffer_event *event;
        struct ftrace_graph_ret_entry *next;
  
 -      ring_iter = iter->buffer_iter[iter->cpu];
 +      /*
 +       * If the previous output failed to write to the seq buffer,
 +       * then we just reuse the data from before.
 +       */
 +      if (data && data->failed) {
 +              curr = &data->ent;
 +              next = &data->ret;
 +      } else {
  
 -      /* First peek to compare current entry and the next one */
 -      if (ring_iter)
 -              event = ring_buffer_iter_peek(ring_iter, NULL);
 -      else {
 -      /* We need to consume the current entry to see the next one */
 -              ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
 -              event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
 -                                      NULL);
 -      }
 +              ring_iter = iter->buffer_iter[iter->cpu];
 +
 +              /* First peek to compare current entry and the next one */
 +              if (ring_iter)
 +                      event = ring_buffer_iter_peek(ring_iter, NULL);
 +              else {
 +                      /*
 +                       * We need to consume the current entry to see
 +                       * the next one.
 +                       */
 +                      ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
 +                      event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
 +                                               NULL);
 +              }
  
 -      if (!event)
 -              return NULL;
 +              if (!event)
 +                      return NULL;
 +
 +              next = ring_buffer_event_data(event);
  
 -      next = ring_buffer_event_data(event);
 +              if (data) {
 +                      /*
 +                       * Save current and next entries for later reference
 +                       * if the output fails.
 +                       */
 +                      data->ent = *curr;
 +                      data->ret = *next;
 +              }
 +      }
  
        if (next->ent.type != TRACE_GRAPH_RET)
                return NULL;
@@@ -674,7 -640,7 +674,7 @@@ print_graph_entry_leaf(struct trace_ite
  
        if (data) {
                int cpu = iter->cpu;
 -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
 +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
  
                /*
                 * Comments display at + 1 to depth. Since
@@@ -722,7 -688,7 +722,7 @@@ print_graph_entry_nested(struct trace_i
  
        if (data) {
                int cpu = iter->cpu;
 -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
 +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
  
                *depth = call->depth;
        }
@@@ -816,34 -782,19 +816,34 @@@ static enum print_line_
  print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
                        struct trace_iterator *iter)
  {
 -      int cpu = iter->cpu;
 +      struct fgraph_data *data = iter->private;
        struct ftrace_graph_ent *call = &field->graph_ent;
        struct ftrace_graph_ret_entry *leaf_ret;
 +      static enum print_line_t ret;
 +      int cpu = iter->cpu;
  
        if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
                return TRACE_TYPE_PARTIAL_LINE;
  
        leaf_ret = get_return_for_leaf(iter, field);
        if (leaf_ret)
 -              return print_graph_entry_leaf(iter, field, leaf_ret, s);
 +              ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
        else
 -              return print_graph_entry_nested(iter, field, s, cpu);
 +              ret = print_graph_entry_nested(iter, field, s, cpu);
  
 +      if (data) {
 +              /*
 +               * If we failed to write our output, then we need to make
 +               * note of it. Because we already consumed our entry.
 +               */
 +              if (s->full) {
 +                      data->failed = 1;
 +                      data->cpu = cpu;
 +              } else
 +                      data->failed = 0;
 +      }
 +
 +      return ret;
  }
  
  static enum print_line_t
@@@ -859,7 -810,7 +859,7 @@@ print_graph_return(struct ftrace_graph_
  
        if (data) {
                int cpu = iter->cpu;
 -              int *depth = &(per_cpu_ptr(data, cpu)->depth);
 +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
  
                /*
                 * Comments display at + 1 to depth. This is the
@@@ -922,7 -873,7 +922,7 @@@ print_graph_comment(struct trace_seq *s
        int i;
  
        if (data)
 -              depth = per_cpu_ptr(data, iter->cpu)->depth;
 +              depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
  
        if (print_graph_prologue(iter, s, 0, 0))
                return TRACE_TYPE_PARTIAL_LINE;
  enum print_line_t
  print_graph_function(struct trace_iterator *iter)
  {
 +      struct ftrace_graph_ent_entry *field;
 +      struct fgraph_data *data = iter->private;
        struct trace_entry *entry = iter->ent;
        struct trace_seq *s = &iter->seq;
 +      int cpu = iter->cpu;
 +      int ret;
 +
 +      if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
 +              per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
 +              return TRACE_TYPE_HANDLED;
 +      }
 +
 +      /*
 +       * If the last output failed, there's a possibility we need
 +       * to print out the missing entry which would never go out.
 +       */
 +      if (data && data->failed) {
 +              field = &data->ent;
 +              iter->cpu = data->cpu;
 +              ret = print_graph_entry(field, s, iter);
 +              if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
 +                      per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
 +                      ret = TRACE_TYPE_NO_CONSUME;
 +              }
 +              iter->cpu = cpu;
 +              return ret;
 +      }
  
        switch (entry->type) {
        case TRACE_GRAPH_ENT: {
                 * sizeof(struct ftrace_graph_ent_entry) is very small,
                 * it can be safely saved at the stack.
                 */
 -              struct ftrace_graph_ent_entry *field, saved;
 +              struct ftrace_graph_ent_entry saved;
                trace_assign_type(field, entry);
                saved = *field;
                return print_graph_entry(&saved, s, iter);
@@@ -1104,54 -1030,31 +1104,54 @@@ static void print_graph_headers(struct 
  static void graph_trace_open(struct trace_iterator *iter)
  {
        /* pid and depth on the last trace processed */
 -      struct fgraph_data *data = alloc_percpu(struct fgraph_data);
 +      struct fgraph_data *data;
        int cpu;
  
 +      iter->private = NULL;
 +
 +      data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
 -              pr_warning("function graph tracer: not enough memory\n");
 -      else
 -              for_each_possible_cpu(cpu) {
 -                      pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
 -                      int *depth = &(per_cpu_ptr(data, cpu)->depth);
 -                      *pid = -1;
 -                      *depth = 0;
 -              }
 +              goto out_err;
 +
 +      data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
 +      if (!data->cpu_data)
 +              goto out_err_free;
 +
 +      for_each_possible_cpu(cpu) {
 +              pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
 +              int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
 +              int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
 +              *pid = -1;
 +              *depth = 0;
 +              *ignore = 0;
 +      }
  
        iter->private = data;
 +
 +      return;
 +
 + out_err_free:
 +      kfree(data);
 + out_err:
 +      pr_warning("function graph tracer: not enough memory\n");
  }
  
  static void graph_trace_close(struct trace_iterator *iter)
  {
 -      free_percpu(iter->private);
 +      struct fgraph_data *data = iter->private;
 +
 +      if (data) {
 +              free_percpu(data->cpu_data);
 +              kfree(data);
 +      }
  }
  
  static struct tracer graph_trace __read_mostly = {
        .name           = "function_graph",
        .open           = graph_trace_open,
 +      .pipe_open      = graph_trace_open,
        .close          = graph_trace_close,
 +      .pipe_close     = graph_trace_close,
        .wait_pipe      = poll_wait_pipe,
        .init           = graph_trace_init,
        .reset          = graph_trace_reset,
index 69543a905cd5f1c92086cb47576a21028db11a37,adaf7a39d0dcbaacad64d26bfbaab671b8d74fe8..7b97000745f5cc38601aff4101b58d9d8bb99b6c
  
  #define BTS_BUFFER_SIZE (1 << 13)
  
- static DEFINE_PER_CPU(struct bts_tracer *, tracer);
- static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
+ static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
+ static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
  
- #define this_tracer per_cpu(tracer, smp_processor_id())
+ #define this_tracer per_cpu(hwb_tracer, smp_processor_id())
  
  static int trace_hw_branches_enabled __read_mostly;
  static int trace_hw_branches_suspended __read_mostly;
@@@ -32,12 -32,13 +32,13 @@@ static struct trace_array *hw_branch_tr
  
  static void bts_trace_init_cpu(int cpu)
  {
-       per_cpu(tracer, cpu) =
-               ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
-                                  NULL, (size_t)-1, BTS_KERNEL);
+       per_cpu(hwb_tracer, cpu) =
+               ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
+                                  BTS_BUFFER_SIZE, NULL, (size_t)-1,
+                                  BTS_KERNEL);
  
-       if (IS_ERR(per_cpu(tracer, cpu)))
-               per_cpu(tracer, cpu) = NULL;
+       if (IS_ERR(per_cpu(hwb_tracer, cpu)))
+               per_cpu(hwb_tracer, cpu) = NULL;
  }
  
  static int bts_trace_init(struct trace_array *tr)
@@@ -51,7 -52,7 +52,7 @@@
        for_each_online_cpu(cpu) {
                bts_trace_init_cpu(cpu);
  
-               if (likely(per_cpu(tracer, cpu)))
+               if (likely(per_cpu(hwb_tracer, cpu)))
                        trace_hw_branches_enabled = 1;
        }
        trace_hw_branches_suspended = 0;
@@@ -67,9 -68,9 +68,9 @@@ static void bts_trace_reset(struct trac
  
        get_online_cpus();
        for_each_online_cpu(cpu) {
-               if (likely(per_cpu(tracer, cpu))) {
-                       ds_release_bts(per_cpu(tracer, cpu));
-                       per_cpu(tracer, cpu) = NULL;
+               if (likely(per_cpu(hwb_tracer, cpu))) {
+                       ds_release_bts(per_cpu(hwb_tracer, cpu));
+                       per_cpu(hwb_tracer, cpu) = NULL;
                }
        }
        trace_hw_branches_enabled = 0;
@@@ -83,8 -84,8 +84,8 @@@ static void bts_trace_start(struct trac
  
        get_online_cpus();
        for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_resume_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_resume_bts(per_cpu(hwb_tracer, cpu));
        trace_hw_branches_suspended = 0;
        put_online_cpus();
  }
@@@ -95,8 -96,8 +96,8 @@@ static void bts_trace_stop(struct trace
  
        get_online_cpus();
        for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_suspend_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_suspend_bts(per_cpu(hwb_tracer, cpu));
        trace_hw_branches_suspended = 1;
        put_online_cpus();
  }
@@@ -114,16 -115,16 +115,16 @@@ static int __cpuinit bts_hotcpu_handler
                        bts_trace_init_cpu(cpu);
  
                        if (trace_hw_branches_suspended &&
-                           likely(per_cpu(tracer, cpu)))
-                               ds_suspend_bts(per_cpu(tracer, cpu));
+                           likely(per_cpu(hwb_tracer, cpu)))
+                               ds_suspend_bts(per_cpu(hwb_tracer, cpu));
                }
                break;
  
        case CPU_DOWN_PREPARE:
                /* The notification is sent with interrupts enabled. */
-               if (likely(per_cpu(tracer, cpu))) {
-                       ds_release_bts(per_cpu(tracer, cpu));
-                       per_cpu(tracer, cpu) = NULL;
+               if (likely(per_cpu(hwb_tracer, cpu))) {
+                       ds_release_bts(per_cpu(hwb_tracer, cpu));
+                       per_cpu(hwb_tracer, cpu) = NULL;
                }
        }
  
@@@ -165,7 -166,6 +166,7 @@@ void trace_hw_branch(u64 from, u64 to
        struct ftrace_event_call *call = &event_hw_branch;
        struct trace_array *tr = hw_branch_trace;
        struct ring_buffer_event *event;
 +      struct ring_buffer *buf;
        struct hw_branch_entry *entry;
        unsigned long irq1;
        int cpu;
        if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
                goto out;
  
 -      event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
 +      buf = tr->buffer;
 +      event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
                                          sizeof(*entry), 0, 0);
        if (!event)
                goto out;
        entry->ent.type = TRACE_HW_BRANCHES;
        entry->from = from;
        entry->to   = to;
 -      if (!filter_check_discard(call, entry, tr->buffer, event))
 -              trace_buffer_unlock_commit(tr, event, 0, 0);
 +      if (!filter_check_discard(call, entry, buf, event))
 +              trace_buffer_unlock_commit(buf, event, 0, 0);
  
   out:
        atomic_dec(&tr->data[cpu]->disabled);
@@@ -258,8 -257,8 +259,8 @@@ static void trace_bts_prepare(struct tr
  
        get_online_cpus();
        for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_suspend_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_suspend_bts(per_cpu(hwb_tracer, cpu));
        /*
         * We need to collect the trace on the respective cpu since ftrace
         * implicitly adds the record for the current cpu.
        on_each_cpu(trace_bts_cpu, iter->tr, 1);
  
        for_each_online_cpu(cpu)
-               if (likely(per_cpu(tracer, cpu)))
-                       ds_resume_bts(per_cpu(tracer, cpu));
+               if (likely(per_cpu(hwb_tracer, cpu)))
+                       ds_resume_bts(per_cpu(hwb_tracer, cpu));
        put_online_cpus();
  }
  
diff --combined mm/slab.c
index a6c9166996a9f389e72f3c71a2df7bacc937fe6f,211b1746c63ca7c723921b6b3ab6ba17a9a907c1..29b09599af7cb4510c37e02d873eab4e2cd52c79
+++ b/mm/slab.c
@@@ -604,26 -604,6 +604,26 @@@ static struct kmem_cache cache_cache = 
  
  #define BAD_ALIEN_MAGIC 0x01020304ul
  
 +/*
 + * chicken and egg problem: delay the per-cpu array allocation
 + * until the general caches are up.
 + */
 +static enum {
 +      NONE,
 +      PARTIAL_AC,
 +      PARTIAL_L3,
 +      EARLY,
 +      FULL
 +} g_cpucache_up;
 +
 +/*
 + * used by boot code to determine if it can use slab based allocator
 + */
 +int slab_is_available(void)
 +{
 +      return g_cpucache_up >= EARLY;
 +}
 +
  #ifdef CONFIG_LOCKDEP
  
  /*
  static struct lock_class_key on_slab_l3_key;
  static struct lock_class_key on_slab_alc_key;
  
 -static inline void init_lock_keys(void)
 -
 +static void init_node_lock_keys(int q)
  {
 -      int q;
        struct cache_sizes *s = malloc_sizes;
  
 -      while (s->cs_size != ULONG_MAX) {
 -              for_each_node(q) {
 -                      struct array_cache **alc;
 -                      int r;
 -                      struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
 -                      if (!l3 || OFF_SLAB(s->cs_cachep))
 -                              continue;
 -                      lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
 -                      alc = l3->alien;
 -                      /*
 -                       * FIXME: This check for BAD_ALIEN_MAGIC
 -                       * should go away when common slab code is taught to
 -                       * work even without alien caches.
 -                       * Currently, non NUMA code returns BAD_ALIEN_MAGIC
 -                       * for alloc_alien_cache,
 -                       */
 -                      if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
 -                              continue;
 -                      for_each_node(r) {
 -                              if (alc[r])
 -                                      lockdep_set_class(&alc[r]->lock,
 -                                           &on_slab_alc_key);
 -                      }
 +      if (g_cpucache_up != FULL)
 +              return;
 +
 +      for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
 +              struct array_cache **alc;
 +              struct kmem_list3 *l3;
 +              int r;
 +
 +              l3 = s->cs_cachep->nodelists[q];
 +              if (!l3 || OFF_SLAB(s->cs_cachep))
 +                      return;
 +              lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
 +              alc = l3->alien;
 +              /*
 +               * FIXME: This check for BAD_ALIEN_MAGIC
 +               * should go away when common slab code is taught to
 +               * work even without alien caches.
 +               * Currently, non NUMA code returns BAD_ALIEN_MAGIC
 +               * for alloc_alien_cache,
 +               */
 +              if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
 +                      return;
 +              for_each_node(r) {
 +                      if (alc[r])
 +                              lockdep_set_class(&alc[r]->lock,
 +                                      &on_slab_alc_key);
                }
 -              s++;
        }
  }
 +
 +static inline void init_lock_keys(void)
 +{
 +      int node;
 +
 +      for_each_node(node)
 +              init_node_lock_keys(node);
 +}
  #else
 +static void init_node_lock_keys(int q)
 +{
 +}
 +
  static inline void init_lock_keys(void)
  {
  }
  static DEFINE_MUTEX(cache_chain_mutex);
  static struct list_head cache_chain;
  
- static DEFINE_PER_CPU(struct delayed_work, reap_work);
 -/*
 - * chicken and egg problem: delay the per-cpu array allocation
 - * until the general caches are up.
 - */
 -static enum {
 -      NONE,
 -      PARTIAL_AC,
 -      PARTIAL_L3,
 -      EARLY,
 -      FULL
 -} g_cpucache_up;
 -
 -/*
 - * used by boot code to determine if it can use slab based allocator
 - */
 -int slab_is_available(void)
 -{
 -      return g_cpucache_up >= EARLY;
 -}
 -
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
  
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
  {
@@@ -838,7 -826,7 +838,7 @@@ __setup("noaliencache", noaliencache_se
   * objects freed on different nodes from which they were allocated) and the
   * flushing of remote pcps by calling drain_node_pages.
   */
- static DEFINE_PER_CPU(unsigned long, reap_node);
+ static DEFINE_PER_CPU(unsigned long, slab_reap_node);
  
  static void init_reap_node(int cpu)
  {
        if (node == MAX_NUMNODES)
                node = first_node(node_online_map);
  
-       per_cpu(reap_node, cpu) = node;
+       per_cpu(slab_reap_node, cpu) = node;
  }
  
  static void next_reap_node(void)
  {
-       int node = __get_cpu_var(reap_node);
+       int node = __get_cpu_var(slab_reap_node);
  
        node = next_node(node, node_online_map);
        if (unlikely(node >= MAX_NUMNODES))
                node = first_node(node_online_map);
-       __get_cpu_var(reap_node) = node;
+       __get_cpu_var(slab_reap_node) = node;
  }
  
  #else
   */
  static void __cpuinit start_cpu_timer(int cpu)
  {
-       struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
+       struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
  
        /*
         * When this gets called from do_initcalls via cpucache_init(),
@@@ -1039,7 -1027,7 +1039,7 @@@ static void __drain_alien_cache(struct 
   */
  static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
  {
-       int node = __get_cpu_var(reap_node);
+       int node = __get_cpu_var(slab_reap_node);
  
        if (l3->alien) {
                struct array_cache *ac = l3->alien[node];
@@@ -1266,8 -1254,6 +1266,8 @@@ static int __cpuinit cpuup_prepare(lon
                kfree(shared);
                free_alien_cache(alien);
        }
 +      init_node_lock_keys(node);
 +
        return 0;
  bad:
        cpuup_canceled(cpu);
@@@ -1300,9 -1286,9 +1300,9 @@@ static int __cpuinit cpuup_callback(str
                 * anything expensive but will only modify reap_work
                 * and reschedule the timer.
                */
-               cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
+               cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
                /* Now the cache_reaper is guaranteed to be not running. */
-               per_cpu(reap_work, cpu).work.func = NULL;
+               per_cpu(slab_reap_work, cpu).work.func = NULL;
                break;
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
@@@ -3117,19 -3103,13 +3117,19 @@@ static inline void *____cache_alloc(str
        } else {
                STATS_INC_ALLOCMISS(cachep);
                objp = cache_alloc_refill(cachep, flags);
 +              /*
 +               * the 'ac' may be updated by cache_alloc_refill(),
 +               * and kmemleak_erase() requires its correct value.
 +               */
 +              ac = cpu_cache_get(cachep);
        }
        /*
         * To avoid a false negative, if an object that is in one of the
         * per-CPU caches is leaked, we need to make sure kmemleak doesn't
         * treat the array pointers as a reference to the object.
         */
 -      kmemleak_erase(&ac->entry[ac->avail]);
 +      if (objp)
 +              kmemleak_erase(&ac->entry[ac->avail]);
        return objp;
  }
  
@@@ -3326,7 -3306,7 +3326,7 @@@ __cache_alloc_node(struct kmem_cache *c
        cache_alloc_debugcheck_before(cachep, flags);
        local_irq_save(save_flags);
  
 -      if (unlikely(nodeid == -1))
 +      if (nodeid == -1)
                nodeid = numa_node_id();
  
        if (unlikely(!cachep->nodelists[nodeid])) {
diff --combined mm/vmalloc.c
index 0f551a4a44cddc7a042d47bbcd85c7126569ee69,b65cfe44a5629e25f33dfcb7de95bcbbbda27b3a..9b08d790df6fe441a2b8c2002ed9f0e1a4677641
@@@ -12,7 -12,6 +12,7 @@@
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/highmem.h>
 +#include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
  #include <linux/rcupdate.h>
  #include <linux/pfn.h>
  #include <linux/kmemleak.h>
 -#include <linux/highmem.h>
  #include <asm/atomic.h>
  #include <asm/uaccess.h>
  #include <asm/tlbflush.h>
 +#include <asm/shmparam.h>
  
  
  /*** Page table manipulation functions ***/
@@@ -761,7 -760,7 +761,7 @@@ static struct vmap_block *new_vmap_bloc
        spin_lock(&vbq->lock);
        list_add(&vb->free_list, &vbq->free);
        spin_unlock(&vbq->lock);
-       put_cpu_var(vmap_cpu_blocks);
+       put_cpu_var(vmap_block_queue);
  
        return vb;
  }
@@@ -826,7 -825,7 +826,7 @@@ again
                }
                spin_unlock(&vb->lock);
        }
-       put_cpu_var(vmap_cpu_blocks);
+       put_cpu_var(vmap_block_queue);
        rcu_read_unlock();
  
        if (!addr) {
@@@ -1157,11 -1156,12 +1157,11 @@@ static void insert_vmalloc_vm(struct vm
  }
  
  static struct vm_struct *__get_vm_area_node(unsigned long size,
 -              unsigned long flags, unsigned long start, unsigned long end,
 -              int node, gfp_t gfp_mask, void *caller)
 +              unsigned long align, unsigned long flags, unsigned long start,
 +              unsigned long end, int node, gfp_t gfp_mask, void *caller)
  {
        static struct vmap_area *va;
        struct vm_struct *area;
 -      unsigned long align = 1;
  
        BUG_ON(in_interrupt());
        if (flags & VM_IOREMAP) {
  struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
                                unsigned long start, unsigned long end)
  {
 -      return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
 +      return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
                                                __builtin_return_address(0));
  }
  EXPORT_SYMBOL_GPL(__get_vm_area);
@@@ -1210,7 -1210,7 +1210,7 @@@ struct vm_struct *__get_vm_area_caller(
                                       unsigned long start, unsigned long end,
                                       void *caller)
  {
 -      return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
 +      return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
                                  caller);
  }
  
   */
  struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
  {
 -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
 +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
                                -1, GFP_KERNEL, __builtin_return_address(0));
  }
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
                                void *caller)
  {
 -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
 +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
                                                -1, GFP_KERNEL, caller);
  }
  
  struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
                                   int node, gfp_t gfp_mask)
  {
 -      return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
 -                                gfp_mask, __builtin_return_address(0));
 +      return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
 +                                node, gfp_mask, __builtin_return_address(0));
  }
  
  static struct vm_struct *find_vm_area(const void *addr)
@@@ -1403,8 -1403,7 +1403,8 @@@ void *vmap(struct page **pages, unsigne
  }
  EXPORT_SYMBOL(vmap);
  
 -static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 +static void *__vmalloc_node(unsigned long size, unsigned long align,
 +                          gfp_t gfp_mask, pgprot_t prot,
                            int node, void *caller);
  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                                 pgprot_t prot, int node, void *caller)
        area->nr_pages = nr_pages;
        /* Please note that the recursion is strictly bounded. */
        if (array_size > PAGE_SIZE) {
 -              pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
 +              pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
                                PAGE_KERNEL, node, caller);
                area->flags |= VM_VPAGES;
        } else {
@@@ -1477,7 -1476,6 +1477,7 @@@ void *__vmalloc_area(struct vm_struct *
  /**
   *    __vmalloc_node  -  allocate virtually contiguous memory
   *    @size:          allocation size
 + *    @align:         desired alignment
   *    @gfp_mask:      flags for the page level allocator
   *    @prot:          protection mask for the allocated pages
   *    @node:          node to use for allocation or -1
   *    allocator with @gfp_mask flags.  Map them into contiguous
   *    kernel virtual space, using a pagetable protection of @prot.
   */
 -static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 -                                              int node, void *caller)
 +static void *__vmalloc_node(unsigned long size, unsigned long align,
 +                          gfp_t gfp_mask, pgprot_t prot,
 +                          int node, void *caller)
  {
        struct vm_struct *area;
        void *addr;
        if (!size || (size >> PAGE_SHIFT) > totalram_pages)
                return NULL;
  
 -      area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
 -                                              node, gfp_mask, caller);
 +      area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
 +                                VMALLOC_END, node, gfp_mask, caller);
  
        if (!area)
                return NULL;
  
  void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
  {
 -      return __vmalloc_node(size, gfp_mask, prot, -1,
 +      return __vmalloc_node(size, 1, gfp_mask, prot, -1,
                                __builtin_return_address(0));
  }
  EXPORT_SYMBOL(__vmalloc);
   */
  void *vmalloc(unsigned long size)
  {
 -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
 +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
                                        -1, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(vmalloc);
@@@ -1552,8 -1549,7 +1552,8 @@@ void *vmalloc_user(unsigned long size
        struct vm_struct *area;
        void *ret;
  
 -      ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
 +      ret = __vmalloc_node(size, SHMLBA,
 +                           GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                             PAGE_KERNEL, -1, __builtin_return_address(0));
        if (ret) {
                area = find_vm_area(ret);
@@@ -1576,7 -1572,7 +1576,7 @@@ EXPORT_SYMBOL(vmalloc_user)
   */
  void *vmalloc_node(unsigned long size, int node)
  {
 -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
 +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
                                        node, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(vmalloc_node);
  
  void *vmalloc_exec(unsigned long size)
  {
 -      return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
 +      return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
                              -1, __builtin_return_address(0));
  }
  
   */
  void *vmalloc_32(unsigned long size)
  {
 -      return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
 +      return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
                              -1, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(vmalloc_32);
@@@ -1637,7 -1633,7 +1637,7 @@@ void *vmalloc_32_user(unsigned long siz
        struct vm_struct *area;
        void *ret;
  
 -      ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
 +      ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
                             -1, __builtin_return_address(0));
        if (ret) {
                area = find_vm_area(ret);
This page took 0.391129 seconds and 4 git commands to generate.