]> Git Repo - linux.git/commitdiff
Merge branch 'linus' into perf/core
authorIngo Molnar <[email protected]>
Thu, 8 Apr 2010 11:36:36 +0000 (13:36 +0200)
committerIngo Molnar <[email protected]>
Thu, 8 Apr 2010 11:37:18 +0000 (13:37 +0200)
Semantic conflict: arch/x86/kernel/cpu/perf_event_intel_ds.c

Merge reason: pick up latest fixes, fix the conflict

Signed-off-by: Ingo Molnar <[email protected]>
1  2 
MAINTAINERS
arch/x86/Kconfig
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/ptrace.c
include/linux/mm.h
kernel/fork.c
kernel/perf_event.c
kernel/sched.c
kernel/trace/trace_selftest.c

diff --combined MAINTAINERS
index 6c858e89c7d01c2fc5849366b49ed4c38d058075,7a9ccda2a3070183af6f19e361181ed2bc1bade2..c3e9c3633b75ca89bee21c126339be1053717770
  S:    Odd Fixes
  F:    drivers/char/hvc_*
  
- VIRTIO CONSOLE DRIVER
- M:    Amit Shah <[email protected]>
- L:    [email protected]
- S:    Maintained
- F:    drivers/char/virtio_console.c
  iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
  M:    Peter Jones <[email protected]>
  M:    Konrad Rzeszutek Wilk <[email protected]>
@@@ -4349,13 -4343,13 +4343,13 @@@ M:   Paul Mackerras <[email protected]
  M:    Ingo Molnar <[email protected]>
  M:    Arnaldo Carvalho de Melo <[email protected]>
  S:    Supported
 -F:    kernel/perf_event.c
 +F:    kernel/perf_event*.c
  F:    include/linux/perf_event.h
 -F:    arch/*/kernel/perf_event.c
 -F:    arch/*/kernel/*/perf_event.c
 -F:    arch/*/kernel/*/*/perf_event.c
 +F:    arch/*/kernel/perf_event*.c
 +F:    arch/*/kernel/*/perf_event*.c
 +F:    arch/*/kernel/*/*/perf_event*.c
  F:    arch/*/include/asm/perf_event.h
 -F:    arch/*/lib/perf_event.c
 +F:    arch/*/lib/perf_event*.c
  F:    arch/*/kernel/perf_callchain.c
  F:    tools/perf/
  
@@@ -5971,6 -5965,13 +5965,13 @@@ S:    Maintaine
  F:    Documentation/filesystems/vfat.txt
  F:    fs/fat/
  
+ VIRTIO CONSOLE DRIVER
+ M:    Amit Shah <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/char/virtio_console.c
+ F:    include/linux/virtio_console.h
  VIRTIO HOST (VHOST)
  M:    "Michael S. Tsirkin" <[email protected]>
  L:    [email protected]
diff --combined arch/x86/Kconfig
index 7191b6eb16d8799277a52064c26f05de40b0bba9,9458685902bddecc3f221012389085084bfb23ef..97a95dfd118110f908e8e916897458d885a6ae94
@@@ -58,9 -58,6 +58,9 @@@ config X8
        select HAVE_ARCH_KMEMCHECK
        select HAVE_USER_RETURN_NOTIFIER
  
 +config INSTRUCTION_DECODER
 +      def_bool (KPROBES || PERF_EVENTS)
 +
  config OUTPUT_FORMAT
        string
        default "elf32-i386" if X86_32
@@@ -1219,8 -1216,8 +1219,8 @@@ config NUMA_EM
  
  config NODES_SHIFT
        int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
-       range 1 9
-       default "9" if MAXSMP
+       range 1 10
+       default "10" if MAXSMP
        default "6" if X86_64
        default "4" if X86_NUMAQ
        default "3"
index 353a174adb4478bd7ece980169d706cb0c7143d7,db5bdc8addf82f1df406488025a5b4877ed53419..626154a9f535b28d53140b60e5b360a6a1a0db75
@@@ -21,6 -21,7 +21,7 @@@
  #include <linux/kdebug.h>
  #include <linux/sched.h>
  #include <linux/uaccess.h>
+ #include <linux/slab.h>
  #include <linux/highmem.h>
  #include <linux/cpu.h>
  #include <linux/bitops.h>
  #include <asm/nmi.h>
  #include <asm/compat.h>
  
 -static u64 perf_event_mask __read_mostly;
 +#if 0
 +#undef wrmsrl
 +#define wrmsrl(msr, val)                                      \
 +do {                                                          \
 +      trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
 +                      (unsigned long)(val));                  \
 +      native_write_msr((msr), (u32)((u64)(val)),              \
 +                      (u32)((u64)(val) >> 32));               \
 +} while (0)
 +#endif
  
 -/* The maximal number of PEBS events: */
 -#define MAX_PEBS_EVENTS       4
 +/*
 + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 + */
 +static unsigned long
 +copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 +{
 +      unsigned long offset, addr = (unsigned long)from;
 +      int type = in_nmi() ? KM_NMI : KM_IRQ0;
 +      unsigned long size, len = 0;
 +      struct page *page;
 +      void *map;
 +      int ret;
  
 -/* The size of a BTS record in bytes: */
 -#define BTS_RECORD_SIZE               24
 +      do {
 +              ret = __get_user_pages_fast(addr, 1, 0, &page);
 +              if (!ret)
 +                      break;
  
 -/* The size of a per-cpu BTS buffer in bytes: */
 -#define BTS_BUFFER_SIZE               (BTS_RECORD_SIZE * 2048)
 +              offset = addr & (PAGE_SIZE - 1);
 +              size = min(PAGE_SIZE - offset, n - len);
  
 -/* The BTS overflow threshold in bytes from the end of the buffer: */
 -#define BTS_OVFL_TH           (BTS_RECORD_SIZE * 128)
 +              map = kmap_atomic(page, type);
 +              memcpy(to, map+offset, size);
 +              kunmap_atomic(map, type);
 +              put_page(page);
  
 +              len  += size;
 +              to   += size;
 +              addr += size;
  
 -/*
 - * Bits in the debugctlmsr controlling branch tracing.
 - */
 -#define X86_DEBUGCTL_TR                       (1 << 6)
 -#define X86_DEBUGCTL_BTS              (1 << 7)
 -#define X86_DEBUGCTL_BTINT            (1 << 8)
 -#define X86_DEBUGCTL_BTS_OFF_OS               (1 << 9)
 -#define X86_DEBUGCTL_BTS_OFF_USR      (1 << 10)
 +      } while (len < n);
  
 -/*
 - * A debug store configuration.
 - *
 - * We only support architectures that use 64bit fields.
 - */
 -struct debug_store {
 -      u64     bts_buffer_base;
 -      u64     bts_index;
 -      u64     bts_absolute_maximum;
 -      u64     bts_interrupt_threshold;
 -      u64     pebs_buffer_base;
 -      u64     pebs_index;
 -      u64     pebs_absolute_maximum;
 -      u64     pebs_interrupt_threshold;
 -      u64     pebs_event_reset[MAX_PEBS_EVENTS];
 -};
 +      return len;
 +}
  
  struct event_constraint {
        union {
@@@ -93,39 -89,18 +94,39 @@@ struct amd_nb 
        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
  };
  
 +#define MAX_LBR_ENTRIES               16
 +
  struct cpu_hw_events {
 +      /*
 +       * Generic x86 PMC bits
 +       */
        struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 -      unsigned long           interrupts;
        int                     enabled;
 -      struct debug_store      *ds;
  
        int                     n_events;
        int                     n_added;
        int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
        u64                     tags[X86_PMC_IDX_MAX];
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 +
 +      /*
 +       * Intel DebugStore bits
 +       */
 +      struct debug_store      *ds;
 +      u64                     pebs_enabled;
 +
 +      /*
 +       * Intel LBR bits
 +       */
 +      int                             lbr_users;
 +      void                            *lbr_context;
 +      struct perf_branch_stack        lbr_stack;
 +      struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
 +
 +      /*
 +       * AMD specific bits
 +       */
        struct amd_nb           *amd_nb;
  };
  
  #define EVENT_CONSTRAINT(c, n, m)     \
        __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
  
 +/*
 + * Constraint on the Event code.
 + */
  #define INTEL_EVENT_CONSTRAINT(c, n)  \
 -      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 +      EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
  
 +/*
 + * Constraint on the Event code + UMask + fixed-mask
 + *
 + * filter mask to validate fixed counter events.
 + * the following filters disqualify for fixed counters:
 + *  - inv
 + *  - edge
 + *  - cnt-mask
 + *  The other filters are supported by fixed counters.
 + *  The any-thread option is supported starting with v3.
 + */
  #define FIXED_EVENT_CONSTRAINT(c, n)  \
 -      EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
 +      EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
 +
 +/*
 + * Constraint on the Event code + UMask
 + */
 +#define PEBS_EVENT_CONSTRAINT(c, n)   \
 +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
  
  #define EVENT_CONSTRAINT_END          \
        EVENT_CONSTRAINT(0, 0, 0)
  #define for_each_event_constraint(e, c)       \
        for ((e) = (c); (e)->cmask; (e)++)
  
 +union perf_capabilities {
 +      struct {
 +              u64     lbr_format    : 6;
 +              u64     pebs_trap     : 1;
 +              u64     pebs_arch_reg : 1;
 +              u64     pebs_format   : 4;
 +              u64     smm_freeze    : 1;
 +      };
 +      u64     capabilities;
 +};
 +
  /*
   * struct x86_pmu - generic x86 pmu
   */
  struct x86_pmu {
 +      /*
 +       * Generic x86 PMC bits
 +       */
        const char      *name;
        int             version;
        int             (*handle_irq)(struct pt_regs *);
        void            (*disable_all)(void);
 -      void            (*enable_all)(void);
 +      void            (*enable_all)(int added);
        void            (*enable)(struct perf_event *);
        void            (*disable)(struct perf_event *);
 +      int             (*hw_config)(struct perf_event *event);
 +      int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
        unsigned        eventsel;
        unsigned        perfctr;
        u64             (*event_map)(int);
 -      u64             (*raw_event)(u64);
        int             max_events;
 -      int             num_events;
 -      int             num_events_fixed;
 -      int             event_bits;
 -      u64             event_mask;
 +      int             num_counters;
 +      int             num_counters_fixed;
 +      int             cntval_bits;
 +      u64             cntval_mask;
        int             apic;
        u64             max_period;
 -      u64             intel_ctrl;
 -      void            (*enable_bts)(u64 config);
 -      void            (*disable_bts)(void);
 -
        struct event_constraint *
                        (*get_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
 +      void            (*quirks)(void);
  
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
        void            (*cpu_dying)(int cpu);
        void            (*cpu_dead)(int cpu);
 +
 +      /*
 +       * Intel Arch Perfmon v2+
 +       */
 +      u64                     intel_ctrl;
 +      union perf_capabilities intel_cap;
 +
 +      /*
 +       * Intel DebugStore bits
 +       */
 +      int             bts, pebs;
 +      int             pebs_record_size;
 +      void            (*drain_pebs)(struct pt_regs *regs);
 +      struct event_constraint *pebs_constraints;
 +
 +      /*
 +       * Intel LBR
 +       */
 +      unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
 +      int             lbr_nr;                    /* hardware stack size */
  };
  
  static struct x86_pmu x86_pmu __read_mostly;
@@@ -275,7 -198,7 +276,7 @@@ static u6
  x86_perf_event_update(struct perf_event *event)
  {
        struct hw_perf_event *hwc = &event->hw;
 -      int shift = 64 - x86_pmu.event_bits;
 +      int shift = 64 - x86_pmu.cntval_bits;
        u64 prev_raw_count, new_raw_count;
        int idx = hwc->idx;
        s64 delta;
@@@ -318,32 -241,33 +319,32 @@@ again
  static atomic_t active_events;
  static DEFINE_MUTEX(pmc_reserve_mutex);
  
 +#ifdef CONFIG_X86_LOCAL_APIC
 +
  static bool reserve_pmc_hardware(void)
  {
 -#ifdef CONFIG_X86_LOCAL_APIC
        int i;
  
        if (nmi_watchdog == NMI_LOCAL_APIC)
                disable_lapic_nmi_watchdog();
  
 -      for (i = 0; i < x86_pmu.num_events; i++) {
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                        goto perfctr_fail;
        }
  
 -      for (i = 0; i < x86_pmu.num_events; i++) {
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
                        goto eventsel_fail;
        }
 -#endif
  
        return true;
  
 -#ifdef CONFIG_X86_LOCAL_APIC
  eventsel_fail:
        for (i--; i >= 0; i--)
                release_evntsel_nmi(x86_pmu.eventsel + i);
  
 -      i = x86_pmu.num_events;
 +      i = x86_pmu.num_counters;
  
  perfctr_fail:
        for (i--; i >= 0; i--)
                enable_lapic_nmi_watchdog();
  
        return false;
 -#endif
  }
  
  static void release_pmc_hardware(void)
  {
 -#ifdef CONFIG_X86_LOCAL_APIC
        int i;
  
 -      for (i = 0; i < x86_pmu.num_events; i++) {
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
                release_perfctr_nmi(x86_pmu.perfctr + i);
                release_evntsel_nmi(x86_pmu.eventsel + i);
        }
  
        if (nmi_watchdog == NMI_LOCAL_APIC)
                enable_lapic_nmi_watchdog();
 -#endif
 -}
 -
 -static inline bool bts_available(void)
 -{
 -      return x86_pmu.enable_bts != NULL;
 -}
 -
 -static void init_debug_store_on_cpu(int cpu)
 -{
 -      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 -
 -      if (!ds)
 -              return;
 -
 -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 -                   (u32)((u64)(unsigned long)ds),
 -                   (u32)((u64)(unsigned long)ds >> 32));
 -}
 -
 -static void fini_debug_store_on_cpu(int cpu)
 -{
 -      if (!per_cpu(cpu_hw_events, cpu).ds)
 -              return;
 -
 -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 -}
 -
 -static void release_bts_hardware(void)
 -{
 -      int cpu;
 -
 -      if (!bts_available())
 -              return;
 -
 -      get_online_cpus();
 -
 -      for_each_online_cpu(cpu)
 -              fini_debug_store_on_cpu(cpu);
 -
 -      for_each_possible_cpu(cpu) {
 -              struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 -
 -              if (!ds)
 -                      continue;
 -
 -              per_cpu(cpu_hw_events, cpu).ds = NULL;
 -
 -              kfree((void *)(unsigned long)ds->bts_buffer_base);
 -              kfree(ds);
 -      }
 -
 -      put_online_cpus();
  }
  
 -static int reserve_bts_hardware(void)
 -{
 -      int cpu, err = 0;
 -
 -      if (!bts_available())
 -              return 0;
 -
 -      get_online_cpus();
 -
 -      for_each_possible_cpu(cpu) {
 -              struct debug_store *ds;
 -              void *buffer;
 -
 -              err = -ENOMEM;
 -              buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
 -              if (unlikely(!buffer))
 -                      break;
 -
 -              ds = kzalloc(sizeof(*ds), GFP_KERNEL);
 -              if (unlikely(!ds)) {
 -                      kfree(buffer);
 -                      break;
 -              }
 +#else
  
 -              ds->bts_buffer_base = (u64)(unsigned long)buffer;
 -              ds->bts_index = ds->bts_buffer_base;
 -              ds->bts_absolute_maximum =
 -                      ds->bts_buffer_base + BTS_BUFFER_SIZE;
 -              ds->bts_interrupt_threshold =
 -                      ds->bts_absolute_maximum - BTS_OVFL_TH;
 +static bool reserve_pmc_hardware(void) { return true; }
 +static void release_pmc_hardware(void) {}
  
 -              per_cpu(cpu_hw_events, cpu).ds = ds;
 -              err = 0;
 -      }
 -
 -      if (err)
 -              release_bts_hardware();
 -      else {
 -              for_each_online_cpu(cpu)
 -                      init_debug_store_on_cpu(cpu);
 -      }
 -
 -      put_online_cpus();
 +#endif
  
 -      return err;
 -}
 +static int reserve_ds_buffers(void);
 +static void release_ds_buffers(void);
  
  static void hw_perf_event_destroy(struct perf_event *event)
  {
        if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
                release_pmc_hardware();
 -              release_bts_hardware();
 +              release_ds_buffers();
                mutex_unlock(&pmc_reserve_mutex);
        }
  }
@@@ -425,28 -441,6 +426,28 @@@ set_ext_hw_attr(struct hw_perf_event *h
        return 0;
  }
  
 +static int x86_pmu_hw_config(struct perf_event *event)
 +{
 +      /*
 +       * Generate PMC IRQs:
 +       * (keep 'enabled' bit clear for now)
 +       */
 +      event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
 +
 +      /*
 +       * Count user and OS events unless requested not to
 +       */
 +      if (!event->attr.exclude_user)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
 +      if (!event->attr.exclude_kernel)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 +
 +      if (event->attr.type == PERF_TYPE_RAW)
 +              event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 +
 +      return 0;
 +}
 +
  /*
   * Setup the hardware configuration for a given attr_type
   */
@@@ -466,11 -460,8 +467,11 @@@ static int __hw_perf_event_init(struct 
                if (atomic_read(&active_events) == 0) {
                        if (!reserve_pmc_hardware())
                                err = -EBUSY;
 -                      else
 -                              err = reserve_bts_hardware();
 +                      else {
 +                              err = reserve_ds_buffers();
 +                              if (err)
 +                                      release_pmc_hardware();
 +                      }
                }
                if (!err)
                        atomic_inc(&active_events);
  
        event->destroy = hw_perf_event_destroy;
  
 -      /*
 -       * Generate PMC IRQs:
 -       * (keep 'enabled' bit clear for now)
 -       */
 -      hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 -
        hwc->idx = -1;
        hwc->last_cpu = -1;
        hwc->last_tag = ~0ULL;
  
 -      /*
 -       * Count user and OS events unless requested not to.
 -       */
 -      if (!attr->exclude_user)
 -              hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
 -      if (!attr->exclude_kernel)
 -              hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 +      /* Processor specifics */
 +      err = x86_pmu.hw_config(event);
 +      if (err)
 +              return err;
  
        if (!hwc->sample_period) {
                hwc->sample_period = x86_pmu.max_period;
                        return -EOPNOTSUPP;
        }
  
 -      /*
 -       * Raw hw_event type provide the config in the hw_event structure
 -       */
 -      if (attr->type == PERF_TYPE_RAW) {
 -              hwc->config |= x86_pmu.raw_event(attr->config);
 -              if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
 -                  perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 -                      return -EACCES;
 +      if (attr->type == PERF_TYPE_RAW)
                return 0;
 -      }
  
        if (attr->type == PERF_TYPE_HW_CACHE)
                return set_ext_hw_attr(hwc, attr);
        if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
            (hwc->sample_period == 1)) {
                /* BTS is not supported by this architecture. */
 -              if (!bts_available())
 +              if (!x86_pmu.bts)
                        return -EOPNOTSUPP;
  
                /* BTS is currently only allowed for user-mode. */
 -              if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
 +              if (!attr->exclude_kernel)
                        return -EOPNOTSUPP;
        }
  
@@@ -549,7 -557,7 +550,7 @@@ static void x86_pmu_disable_all(void
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
  
 -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                u64 val;
  
                if (!test_bit(idx, cpuc->active_mask))
@@@ -579,12 -587,12 +580,12 @@@ void hw_perf_disable(void
        x86_pmu.disable_all();
  }
  
 -static void x86_pmu_enable_all(void)
 +static void x86_pmu_enable_all(int added)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
  
 -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                struct perf_event *event = cpuc->events[idx];
                u64 val;
  
@@@ -659,14 -667,14 +660,14 @@@ static int x86_schedule_events(struct c
         * assign events to counters starting with most
         * constrained events.
         */
 -      wmax = x86_pmu.num_events;
 +      wmax = x86_pmu.num_counters;
  
        /*
         * when fixed event counters are present,
         * wmax is incremented by 1 to account
         * for one more choice
         */
 -      if (x86_pmu.num_events_fixed)
 +      if (x86_pmu.num_counters_fixed)
                wmax++;
  
        for (w = 1, num = n; num && w <= wmax; w++) {
@@@ -716,7 -724,7 +717,7 @@@ static int collect_events(struct cpu_hw
        struct perf_event *event;
        int n, max_count;
  
 -      max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
 +      max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
  
        /* current number of events already accepted */
        n = cpuc->n_events;
@@@ -787,7 -795,7 +788,7 @@@ void hw_perf_enable(void
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct perf_event *event;
        struct hw_perf_event *hwc;
 -      int i;
 +      int i, added = cpuc->n_added;
  
        if (!x86_pmu_initialized())
                return;
        cpuc->enabled = 1;
        barrier();
  
 -      x86_pmu.enable_all();
 +      x86_pmu.enable_all(added);
  }
  
  static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
  {
 -      (void)checking_wrmsrl(hwc->config_base + hwc->idx,
 +      wrmsrl(hwc->config_base + hwc->idx,
                              hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
  }
  
  static inline void x86_pmu_disable_event(struct perf_event *event)
  {
        struct hw_perf_event *hwc = &event->hw;
 -      (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
 +
 +      wrmsrl(hwc->config_base + hwc->idx, hwc->config);
  }
  
  static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@@ -867,7 -874,7 +868,7 @@@ x86_perf_event_set_period(struct perf_e
        struct hw_perf_event *hwc = &event->hw;
        s64 left = atomic64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
 -      int err, ret = 0, idx = hwc->idx;
 +      int ret = 0, idx = hwc->idx;
  
        if (idx == X86_PMC_IDX_FIXED_BTS)
                return 0;
         */
        atomic64_set(&hwc->prev_count, (u64)-left);
  
 -      err = checking_wrmsrl(hwc->event_base + idx,
 -                           (u64)(-left) & x86_pmu.event_mask);
 +      wrmsrl(hwc->event_base + idx,
 +                      (u64)(-left) & x86_pmu.cntval_mask);
  
        perf_event_update_userpage(event);
  
@@@ -943,7 -950,7 +944,7 @@@ static int x86_pmu_enable(struct perf_e
        if (n < 0)
                return n;
  
 -      ret = x86_schedule_events(cpuc, n, assign);
 +      ret = x86_pmu.schedule_events(cpuc, n, assign);
        if (ret)
                return ret;
        /*
@@@ -984,12 -991,11 +985,12 @@@ static void x86_pmu_unthrottle(struct p
  void perf_event_print_debug(void)
  {
        u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 +      u64 pebs;
        struct cpu_hw_events *cpuc;
        unsigned long flags;
        int cpu, idx;
  
 -      if (!x86_pmu.num_events)
 +      if (!x86_pmu.num_counters)
                return;
  
        local_irq_save(flags);
                rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
                rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
                rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
 +              rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
  
                pr_info("\n");
                pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
                pr_info("CPU#%d: status:     %016llx\n", cpu, status);
                pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
                pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 +              pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
        }
 -      pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 +      pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
  
 -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
                rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
  
                pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
                        cpu, idx, prev_left);
        }
 -      for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
                rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
  
                pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@@ -1091,7 -1095,7 +1092,7 @@@ static int x86_pmu_handle_irq(struct pt
  
        cpuc = &__get_cpu_var(cpu_hw_events);
  
 -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                if (!test_bit(idx, cpuc->active_mask))
                        continue;
  
                hwc = &event->hw;
  
                val = x86_perf_event_update(event);
 -              if (val & (1ULL << (x86_pmu.event_bits - 1)))
 +              if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
                        continue;
  
                /*
@@@ -1142,6 -1146,7 +1143,6 @@@ void set_perf_event_pending(void
  
  void perf_events_lapic_init(void)
  {
 -#ifdef CONFIG_X86_LOCAL_APIC
        if (!x86_pmu.apic || !x86_pmu_initialized())
                return;
  
         * Always use NMI for PMU
         */
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 -#endif
  }
  
  static int __kprobes
@@@ -1172,7 -1178,9 +1173,7 @@@ perf_event_nmi_handler(struct notifier_
  
        regs = args->regs;
  
 -#ifdef CONFIG_X86_LOCAL_APIC
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 -#endif
        /*
         * Can't rely on the handled return value to say it was our NMI, two
         * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@@ -1266,15 -1274,12 +1267,15 @@@ int hw_perf_group_sched_in(struct perf_
        int assign[X86_PMC_IDX_MAX];
        int n0, n1, ret;
  
 +      if (!x86_pmu_initialized())
 +              return 0;
 +
        /* n0 = total number of events */
        n0 = collect_events(cpuc, leader, true);
        if (n0 < 0)
                return n0;
  
 -      ret = x86_schedule_events(cpuc, n0, assign);
 +      ret = x86_pmu.schedule_events(cpuc, n0, assign);
        if (ret)
                return ret;
  
@@@ -1324,9 -1329,6 +1325,9 @@@ undo
  
  #include "perf_event_amd.c"
  #include "perf_event_p6.c"
 +#include "perf_event_p4.c"
 +#include "perf_event_intel_lbr.c"
 +#include "perf_event_intel_ds.c"
  #include "perf_event_intel.c"
  
  static int __cpuinit
@@@ -1400,50 -1402,48 +1401,50 @@@ void __init init_hw_perf_events(void
  
        pr_cont("%s PMU driver.\n", x86_pmu.name);
  
 -      if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
 +      if (x86_pmu.quirks)
 +              x86_pmu.quirks();
 +
 +      if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
                WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
 -                   x86_pmu.num_events, X86_PMC_MAX_GENERIC);
 -              x86_pmu.num_events = X86_PMC_MAX_GENERIC;
 +                   x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
 +              x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
        }
 -      perf_event_mask = (1 << x86_pmu.num_events) - 1;
 -      perf_max_events = x86_pmu.num_events;
 +      x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 +      perf_max_events = x86_pmu.num_counters;
  
 -      if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
 +      if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
                WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
 -                   x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
 -              x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
 +                   x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
 +              x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
        }
  
 -      perf_event_mask |=
 -              ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
 -      x86_pmu.intel_ctrl = perf_event_mask;
 +      x86_pmu.intel_ctrl |=
 +              ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
  
        perf_events_lapic_init();
        register_die_notifier(&perf_event_nmi_notifier);
  
        unconstrained = (struct event_constraint)
 -              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
 -                                 0, x86_pmu.num_events);
 +              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
 +                                 0, x86_pmu.num_counters);
  
        if (x86_pmu.event_constraints) {
                for_each_event_constraint(c, x86_pmu.event_constraints) {
 -                      if (c->cmask != INTEL_ARCH_FIXED_MASK)
 +                      if (c->cmask != X86_RAW_EVENT_MASK)
                                continue;
  
 -                      c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
 -                      c->weight += x86_pmu.num_events;
 +                      c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
 +                      c->weight += x86_pmu.num_counters;
                }
        }
  
        pr_info("... version:                %d\n",     x86_pmu.version);
 -      pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
 -      pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
 -      pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
 +      pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 +      pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
 +      pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
        pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 -      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
 -      pr_info("... event mask:             %016Lx\n", perf_event_mask);
 +      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 +      pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
  
        perf_cpu_notifier(x86_pmu_notifier);
  }
@@@ -1462,32 -1462,6 +1463,32 @@@ static const struct pmu pmu = 
        .unthrottle     = x86_pmu_unthrottle,
  };
  
 +/*
 + * validate that we can schedule this event
 + */
 +static int validate_event(struct perf_event *event)
 +{
 +      struct cpu_hw_events *fake_cpuc;
 +      struct event_constraint *c;
 +      int ret = 0;
 +
 +      fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
 +      if (!fake_cpuc)
 +              return -ENOMEM;
 +
 +      c = x86_pmu.get_event_constraints(fake_cpuc, event);
 +
 +      if (!c || !c->weight)
 +              ret = -ENOSPC;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(fake_cpuc, event);
 +
 +      kfree(fake_cpuc);
 +
 +      return ret;
 +}
 +
  /*
   * validate a single event group
   *
@@@ -1528,7 -1502,7 +1529,7 @@@ static int validate_group(struct perf_e
  
        fake_cpuc->n_events = n;
  
 -      ret = x86_schedule_events(fake_cpuc, n, NULL);
 +      ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
  
  out_free:
        kfree(fake_cpuc);
@@@ -1553,8 -1527,6 +1554,8 @@@ const struct pmu *hw_perf_event_init(st
  
                if (event->group_leader != event)
                        err = validate_group(event);
 +              else
 +                      err = validate_event(event);
  
                event->pmu = tmp;
        }
@@@ -1602,7 -1574,8 +1603,7 @@@ static void backtrace_address(void *dat
  {
        struct perf_callchain_entry *entry = data;
  
 -      if (reliable)
 -              callchain_store(entry, addr);
 +      callchain_store(entry, addr);
  }
  
  static const struct stacktrace_ops backtrace_ops = {
@@@ -1624,6 -1597,41 +1625,6 @@@ perf_callchain_kernel(struct pt_regs *r
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
  }
  
 -/*
 - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 - */
 -static unsigned long
 -copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 -{
 -      unsigned long offset, addr = (unsigned long)from;
 -      int type = in_nmi() ? KM_NMI : KM_IRQ0;
 -      unsigned long size, len = 0;
 -      struct page *page;
 -      void *map;
 -      int ret;
 -
 -      do {
 -              ret = __get_user_pages_fast(addr, 1, 0, &page);
 -              if (!ret)
 -                      break;
 -
 -              offset = addr & (PAGE_SIZE - 1);
 -              size = min(PAGE_SIZE - offset, n - len);
 -
 -              map = kmap_atomic(page, type);
 -              memcpy(to, map+offset, size);
 -              kunmap_atomic(map, type);
 -              put_page(page);
 -
 -              len  += size;
 -              to   += size;
 -              addr += size;
 -
 -      } while (len < n);
 -
 -      return len;
 -}
 -
  #ifdef CONFIG_COMPAT
  static inline int
  perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
index f168b4030d406dbb7a84f8c873501b088c8dc823,9c794ac87837622a515094c34cd5b1eb070241b6..a099df96f9168000720480c96f0aaf5e1b3a80b3
@@@ -88,7 -88,7 +88,7 @@@ static u64 intel_pmu_event_map(int hw_e
        return intel_perfmon_event_map[hw_event];
  }
  
 -static __initconst u64 westmere_hw_cache_event_ids
 +static __initconst const u64 westmere_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
   },
  };
  
 -static __initconst u64 nehalem_hw_cache_event_ids
 +static __initconst const u64 nehalem_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
   },
  };
  
 -static __initconst u64 core2_hw_cache_event_ids
 +static __initconst const u64 core2_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
   },
  };
  
 -static __initconst u64 atom_hw_cache_event_ids
 +static __initconst const u64 atom_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
   },
  };
  
 -static u64 intel_pmu_raw_event(u64 hw_event)
 -{
 -#define CORE_EVNTSEL_EVENT_MASK               0x000000FFULL
 -#define CORE_EVNTSEL_UNIT_MASK                0x0000FF00ULL
 -#define CORE_EVNTSEL_EDGE_MASK                0x00040000ULL
 -#define CORE_EVNTSEL_INV_MASK         0x00800000ULL
 -#define CORE_EVNTSEL_REG_MASK         0xFF000000ULL
 -
 -#define CORE_EVNTSEL_MASK             \
 -      (INTEL_ARCH_EVTSEL_MASK |       \
 -       INTEL_ARCH_UNIT_MASK   |       \
 -       INTEL_ARCH_EDGE_MASK   |       \
 -       INTEL_ARCH_INV_MASK    |       \
 -       INTEL_ARCH_CNT_MASK)
 -
 -      return hw_event & CORE_EVNTSEL_MASK;
 -}
 -
 -static void intel_pmu_enable_bts(u64 config)
 -{
 -      unsigned long debugctlmsr;
 -
 -      debugctlmsr = get_debugctlmsr();
 -
 -      debugctlmsr |= X86_DEBUGCTL_TR;
 -      debugctlmsr |= X86_DEBUGCTL_BTS;
 -      debugctlmsr |= X86_DEBUGCTL_BTINT;
 -
 -      if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 -              debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
 -
 -      if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 -              debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
 -
 -      update_debugctlmsr(debugctlmsr);
 -}
 -
 -static void intel_pmu_disable_bts(void)
 -{
 -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 -      unsigned long debugctlmsr;
 -
 -      if (!cpuc->ds)
 -              return;
 -
 -      debugctlmsr = get_debugctlmsr();
 -
 -      debugctlmsr &=
 -              ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
 -                X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
 -
 -      update_debugctlmsr(debugctlmsr);
 -}
 -
  static void intel_pmu_disable_all(void)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
        if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
                intel_pmu_disable_bts();
 +
 +      intel_pmu_pebs_disable_all();
 +      intel_pmu_lbr_disable_all();
  }
  
 -static void intel_pmu_enable_all(void)
 +static void intel_pmu_enable_all(int added)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
 +      intel_pmu_pebs_enable_all();
 +      intel_pmu_lbr_enable_all();
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
  
        if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
        }
  }
  
 +/*
 + * Workaround for:
 + *   Intel Errata AAK100 (model 26)
 + *   Intel Errata AAP53  (model 30)
 + *   Intel Errata BD53   (model 44)
 + *
 + * These chips need to be 'reset' when adding counters by programming
 + * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
 + * either in sequence on the same PMC or on different PMCs.
 + */
 +static void intel_pmu_nhm_enable_all(int added)
 +{
 +      if (added) {
 +              struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +              int i;
 +
 +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
 +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
 +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
 +
 +              wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
 +              wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
 +
 +              for (i = 0; i < 3; i++) {
 +                      struct perf_event *event = cpuc->events[i];
 +
 +                      if (!event)
 +                              continue;
 +
 +                      __x86_pmu_enable_event(&event->hw);
 +              }
 +      }
 +      intel_pmu_enable_all(added);
 +}
 +
  static inline u64 intel_pmu_get_status(void)
  {
        u64 status;
@@@ -533,7 -547,8 +533,7 @@@ static inline void intel_pmu_ack_status
        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
  }
  
 -static inline void
 -intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 +static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
  {
        int idx = hwc->idx - X86_PMC_IDX_FIXED;
        u64 ctrl_val, mask;
  
        rdmsrl(hwc->config_base, ctrl_val);
        ctrl_val &= ~mask;
 -      (void)checking_wrmsrl(hwc->config_base, ctrl_val);
 -}
 -
 -static void intel_pmu_drain_bts_buffer(void)
 -{
 -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 -      struct debug_store *ds = cpuc->ds;
 -      struct bts_record {
 -              u64     from;
 -              u64     to;
 -              u64     flags;
 -      };
 -      struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
 -      struct bts_record *at, *top;
 -      struct perf_output_handle handle;
 -      struct perf_event_header header;
 -      struct perf_sample_data data;
 -      struct pt_regs regs;
 -
 -      if (!event)
 -              return;
 -
 -      if (!ds)
 -              return;
 -
 -      at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 -      top = (struct bts_record *)(unsigned long)ds->bts_index;
 -
 -      if (top <= at)
 -              return;
 -
 -      ds->bts_index = ds->bts_buffer_base;
 -
 -      perf_sample_data_init(&data, 0);
 -
 -      data.period     = event->hw.last_period;
 -      regs.ip         = 0;
 -
 -      /*
 -       * Prepare a generic sample, i.e. fill in the invariant fields.
 -       * We will overwrite the from and to address before we output
 -       * the sample.
 -       */
 -      perf_prepare_sample(&header, &data, event, &regs);
 -
 -      if (perf_output_begin(&handle, event,
 -                            header.size * (top - at), 1, 1))
 -              return;
 -
 -      for (; at < top; at++) {
 -              data.ip         = at->from;
 -              data.addr       = at->to;
 -
 -              perf_output_sample(&handle, &header, &data, event);
 -      }
 -
 -      perf_output_end(&handle);
 -
 -      /* There's new data available. */
 -      event->hw.interrupts++;
 -      event->pending_kill = POLL_IN;
 +      wrmsrl(hwc->config_base, ctrl_val);
  }
  
 -static inline void
 -intel_pmu_disable_event(struct perf_event *event)
 +static void intel_pmu_disable_event(struct perf_event *event)
  {
        struct hw_perf_event *hwc = &event->hw;
  
        }
  
        x86_pmu_disable_event(event);
 +
 +      if (unlikely(event->attr.precise))
 +              intel_pmu_pebs_disable(event);
  }
  
 -static inline void
 -intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 +static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
  {
        int idx = hwc->idx - X86_PMC_IDX_FIXED;
        u64 ctrl_val, bits, mask;
 -      int err;
  
        /*
         * Enable IRQ generation (0x8),
        rdmsrl(hwc->config_base, ctrl_val);
        ctrl_val &= ~mask;
        ctrl_val |= bits;
 -      err = checking_wrmsrl(hwc->config_base, ctrl_val);
 +      wrmsrl(hwc->config_base, ctrl_val);
  }
  
  static void intel_pmu_enable_event(struct perf_event *event)
                return;
        }
  
 +      if (unlikely(event->attr.precise))
 +              intel_pmu_pebs_enable(event);
 +
        __x86_pmu_enable_event(hwc);
  }
  
@@@ -636,20 -708,20 +636,20 @@@ static void intel_pmu_reset(void
        unsigned long flags;
        int idx;
  
 -      if (!x86_pmu.num_events)
 +      if (!x86_pmu.num_counters)
                return;
  
        local_irq_save(flags);
  
        printk("clearing PMU state on CPU#%d\n", smp_processor_id());
  
 -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
                checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
        }
 -      for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
 +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
                checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
 -      }
 +
        if (ds)
                ds->bts_index = ds->bts_buffer_base;
  
@@@ -675,7 -747,7 +675,7 @@@ static int intel_pmu_handle_irq(struct 
        intel_pmu_drain_bts_buffer();
        status = intel_pmu_get_status();
        if (!status) {
 -              intel_pmu_enable_all();
 +              intel_pmu_enable_all(0);
                return 0;
        }
  
@@@ -690,15 -762,6 +690,15 @@@ again
  
        inc_irq_stat(apic_perf_irqs);
        ack = status;
 +
 +      intel_pmu_lbr_read();
 +
 +      /*
 +       * PEBS overflow sets bit 62 in the global status register
 +       */
 +      if (__test_and_clear_bit(62, (unsigned long *)&status))
 +              x86_pmu.drain_pebs(regs);
 +
        for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
                struct perf_event *event = cpuc->events[bit];
  
                goto again;
  
  done:
 -      intel_pmu_enable_all();
 +      intel_pmu_enable_all(0);
        return 1;
  }
  
 -static struct event_constraint bts_constraint =
 -      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 -
  static struct event_constraint *
 -intel_special_constraints(struct perf_event *event)
 +intel_bts_constraints(struct perf_event *event)
  {
 -      unsigned int hw_event;
 -
 -      hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
 +      struct hw_perf_event *hwc = &event->hw;
 +      unsigned int hw_event, bts_event;
  
 -      if (unlikely((hw_event ==
 -                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 -                   (event->hw.sample_period == 1))) {
 +      hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
 +      bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
  
 +      if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
                return &bts_constraint;
 -      }
 +
        return NULL;
  }
  
@@@ -748,53 -815,24 +748,53 @@@ intel_get_event_constraints(struct cpu_
  {
        struct event_constraint *c;
  
 -      c = intel_special_constraints(event);
 +      c = intel_bts_constraints(event);
 +      if (c)
 +              return c;
 +
 +      c = intel_pebs_constraints(event);
        if (c)
                return c;
  
        return x86_get_event_constraints(cpuc, event);
  }
  
 -static __initconst struct x86_pmu core_pmu = {
 +static int intel_pmu_hw_config(struct perf_event *event)
 +{
 +      int ret = x86_pmu_hw_config(event);
 +
 +      if (ret)
 +              return ret;
 +
 +      if (event->attr.type != PERF_TYPE_RAW)
 +              return 0;
 +
 +      if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
 +              return 0;
 +
 +      if (x86_pmu.version < 3)
 +              return -EINVAL;
 +
 +      if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 +              return -EACCES;
 +
 +      event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
 +
 +      return 0;
 +}
 +
 +static __initconst const struct x86_pmu core_pmu = {
        .name                   = "core",
        .handle_irq             = x86_pmu_handle_irq,
        .disable_all            = x86_pmu_disable_all,
        .enable_all             = x86_pmu_enable_all,
        .enable                 = x86_pmu_enable_event,
        .disable                = x86_pmu_disable_event,
 +      .hw_config              = x86_pmu_hw_config,
 +      .schedule_events        = x86_schedule_events,
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
        .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
        .event_map              = intel_pmu_event_map,
 -      .raw_event              = intel_pmu_raw_event,
        .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
        .apic                   = 1,
        /*
        .event_constraints      = intel_core_event_constraints,
  };
  
 -static __initconst struct x86_pmu intel_pmu = {
 +static void intel_pmu_cpu_starting(int cpu)
 +{
 +      init_debug_store_on_cpu(cpu);
 +      /*
 +       * Deal with CPUs that don't clear their LBRs on power-up.
 +       */
 +      intel_pmu_lbr_reset();
 +}
 +
 +static void intel_pmu_cpu_dying(int cpu)
 +{
 +      fini_debug_store_on_cpu(cpu);
 +}
 +
 +static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
        .disable_all            = intel_pmu_disable_all,
        .enable_all             = intel_pmu_enable_all,
        .enable                 = intel_pmu_enable_event,
        .disable                = intel_pmu_disable_event,
 +      .hw_config              = intel_pmu_hw_config,
 +      .schedule_events        = x86_schedule_events,
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
        .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
        .event_map              = intel_pmu_event_map,
 -      .raw_event              = intel_pmu_raw_event,
        .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
        .apic                   = 1,
        /*
         * the generic event period:
         */
        .max_period             = (1ULL << 31) - 1,
 -      .enable_bts             = intel_pmu_enable_bts,
 -      .disable_bts            = intel_pmu_disable_bts,
        .get_event_constraints  = intel_get_event_constraints,
  
 -      .cpu_starting           = init_debug_store_on_cpu,
 -      .cpu_dying              = fini_debug_store_on_cpu,
 +      .cpu_starting           = intel_pmu_cpu_starting,
 +      .cpu_dying              = intel_pmu_cpu_dying,
  };
  
 +static void intel_clovertown_quirks(void)
 +{
 +      /*
 +       * PEBS is unreliable due to:
 +       *
 +       *   AJ67  - PEBS may experience CPL leaks
 +       *   AJ68  - PEBS PMI may be delayed by one event
 +       *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
 +       *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
 +       *
 +       * AJ67 could be worked around by restricting the OS/USR flags.
 +       * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
 +       *
 +       * AJ106 could possibly be worked around by not allowing LBR
 +       *       usage from PEBS, including the fixup.
 +       * AJ68  could possibly be worked around by always programming
 +       *       a pebs_event_reset[0] value and coping with the lost events.
 +       *
 +       * But taken together it might just make sense to not enable PEBS on
 +       * these chips.
 +       */
 +      printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
 +      x86_pmu.pebs = 0;
 +      x86_pmu.pebs_constraints = NULL;
 +}
 +
  static __init int intel_pmu_init(void)
  {
        union cpuid10_edx edx;
        int version;
  
        if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 -              /* check for P6 processor family */
 -         if (boot_cpu_data.x86 == 6) {
 -              return p6_pmu_init();
 -         } else {
 +              switch (boot_cpu_data.x86) {
 +              case 0x6:
 +                      return p6_pmu_init();
 +              case 0xf:
 +                      return p4_pmu_init();
 +              }
                return -ENODEV;
 -         }
        }
  
        /*
                x86_pmu = intel_pmu;
  
        x86_pmu.version                 = version;
 -      x86_pmu.num_events              = eax.split.num_events;
 -      x86_pmu.event_bits              = eax.split.bit_width;
 -      x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
 +      x86_pmu.num_counters            = eax.split.num_counters;
 +      x86_pmu.cntval_bits             = eax.split.bit_width;
 +      x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
  
        /*
         * Quirk: v2 perfmon does not report fixed-purpose events, so
         * assume at least 3 events:
         */
        if (version > 1)
 -              x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
 +              x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
 +
 +      /*
 +       * v2 and above have a perf capabilities MSR
 +       */
 +      if (version > 1) {
 +              u64 capabilities;
 +
 +              rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
 +              x86_pmu.intel_cap.capabilities = capabilities;
 +      }
 +
 +      intel_ds_init();
  
        /*
         * Install the hw-cache-events table:
                break;
  
        case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
 +              x86_pmu.quirks = intel_clovertown_quirks;
        case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
        case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
        case 29: /* six-core 45 nm xeon "Dunnington" */
                memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              intel_pmu_lbr_init_core();
 +
                x86_pmu.event_constraints = intel_core2_event_constraints;
                pr_cont("Core2 events, ");
                break;
  
        case 26: /* 45 nm nehalem, "Bloomfield" */
        case 30: /* 45 nm nehalem, "Lynnfield" */
+       case 46: /* 45 nm nehalem-ex, "Beckton" */
                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              intel_pmu_lbr_init_nhm();
 +
                x86_pmu.event_constraints = intel_nehalem_event_constraints;
 -              pr_cont("Nehalem/Corei7 events, ");
 +              x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 +              pr_cont("Nehalem events, ");
                break;
 +
        case 28: /* Atom */
                memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              intel_pmu_lbr_init_atom();
 +
                x86_pmu.event_constraints = intel_gen_event_constraints;
                pr_cont("Atom events, ");
                break;
                memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              intel_pmu_lbr_init_nhm();
 +
                x86_pmu.event_constraints = intel_westmere_event_constraints;
 +              x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                pr_cont("Westmere events, ");
                break;
  
index 2fea3622af7fde7264d4cdce883ef8c3aadd9bf7,0000000000000000000000000000000000000000..ec8b2e12e10452b2a67ffd88bb02067672c285e4
mode 100644,000000..100644
--- /dev/null
@@@ -1,664 -1,0 +1,664 @@@
-               for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
 +#ifdef CONFIG_CPU_SUP_INTEL
 +
 +/* The maximal number of PEBS events: */
 +#define MAX_PEBS_EVENTS               4
 +
 +/* The size of a BTS record in bytes: */
 +#define BTS_RECORD_SIZE               24
 +
 +#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
 +#define PEBS_BUFFER_SIZE      PAGE_SIZE
 +
 +/*
 + * pebs_record_32 for p4 and core not supported
 +
 +struct pebs_record_32 {
 +      u32 flags, ip;
 +      u32 ax, bc, cx, dx;
 +      u32 si, di, bp, sp;
 +};
 +
 + */
 +
 +struct pebs_record_core {
 +      u64 flags, ip;
 +      u64 ax, bx, cx, dx;
 +      u64 si, di, bp, sp;
 +      u64 r8,  r9,  r10, r11;
 +      u64 r12, r13, r14, r15;
 +};
 +
 +struct pebs_record_nhm {
 +      u64 flags, ip;
 +      u64 ax, bx, cx, dx;
 +      u64 si, di, bp, sp;
 +      u64 r8,  r9,  r10, r11;
 +      u64 r12, r13, r14, r15;
 +      u64 status, dla, dse, lat;
 +};
 +
 +/*
 + * A debug store configuration.
 + *
 + * We only support architectures that use 64bit fields.
 + */
 +struct debug_store {
 +      u64     bts_buffer_base;
 +      u64     bts_index;
 +      u64     bts_absolute_maximum;
 +      u64     bts_interrupt_threshold;
 +      u64     pebs_buffer_base;
 +      u64     pebs_index;
 +      u64     pebs_absolute_maximum;
 +      u64     pebs_interrupt_threshold;
 +      u64     pebs_event_reset[MAX_PEBS_EVENTS];
 +};
 +
 +static void init_debug_store_on_cpu(int cpu)
 +{
 +      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 +
 +      if (!ds)
 +              return;
 +
 +      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 +                   (u32)((u64)(unsigned long)ds),
 +                   (u32)((u64)(unsigned long)ds >> 32));
 +}
 +
 +static void fini_debug_store_on_cpu(int cpu)
 +{
 +      if (!per_cpu(cpu_hw_events, cpu).ds)
 +              return;
 +
 +      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 +}
 +
 +static void release_ds_buffers(void)
 +{
 +      int cpu;
 +
 +      if (!x86_pmu.bts && !x86_pmu.pebs)
 +              return;
 +
 +      get_online_cpus();
 +
 +      for_each_online_cpu(cpu)
 +              fini_debug_store_on_cpu(cpu);
 +
 +      for_each_possible_cpu(cpu) {
 +              struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 +
 +              if (!ds)
 +                      continue;
 +
 +              per_cpu(cpu_hw_events, cpu).ds = NULL;
 +
 +              kfree((void *)(unsigned long)ds->pebs_buffer_base);
 +              kfree((void *)(unsigned long)ds->bts_buffer_base);
 +              kfree(ds);
 +      }
 +
 +      put_online_cpus();
 +}
 +
 +static int reserve_ds_buffers(void)
 +{
 +      int cpu, err = 0;
 +
 +      if (!x86_pmu.bts && !x86_pmu.pebs)
 +              return 0;
 +
 +      get_online_cpus();
 +
 +      for_each_possible_cpu(cpu) {
 +              struct debug_store *ds;
 +              void *buffer;
 +              int max, thresh;
 +
 +              err = -ENOMEM;
 +              ds = kzalloc(sizeof(*ds), GFP_KERNEL);
 +              if (unlikely(!ds))
 +                      break;
 +              per_cpu(cpu_hw_events, cpu).ds = ds;
 +
 +              if (x86_pmu.bts) {
 +                      buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
 +                      if (unlikely(!buffer))
 +                              break;
 +
 +                      max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
 +                      thresh = max / 16;
 +
 +                      ds->bts_buffer_base = (u64)(unsigned long)buffer;
 +                      ds->bts_index = ds->bts_buffer_base;
 +                      ds->bts_absolute_maximum = ds->bts_buffer_base +
 +                              max * BTS_RECORD_SIZE;
 +                      ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
 +                              thresh * BTS_RECORD_SIZE;
 +              }
 +
 +              if (x86_pmu.pebs) {
 +                      buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
 +                      if (unlikely(!buffer))
 +                              break;
 +
 +                      max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
 +
 +                      ds->pebs_buffer_base = (u64)(unsigned long)buffer;
 +                      ds->pebs_index = ds->pebs_buffer_base;
 +                      ds->pebs_absolute_maximum = ds->pebs_buffer_base +
 +                              max * x86_pmu.pebs_record_size;
 +                      /*
 +                       * Always use single record PEBS
 +                       */
 +                      ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
 +                              x86_pmu.pebs_record_size;
 +              }
 +
 +              err = 0;
 +      }
 +
 +      if (err)
 +              release_ds_buffers();
 +      else {
 +              for_each_online_cpu(cpu)
 +                      init_debug_store_on_cpu(cpu);
 +      }
 +
 +      put_online_cpus();
 +
 +      return err;
 +}
 +
 +/*
 + * BTS
 + */
 +
 +static struct event_constraint bts_constraint =
 +      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 +
 +static void intel_pmu_enable_bts(u64 config)
 +{
 +      unsigned long debugctlmsr;
 +
 +      debugctlmsr = get_debugctlmsr();
 +
 +      debugctlmsr |= DEBUGCTLMSR_TR;
 +      debugctlmsr |= DEBUGCTLMSR_BTS;
 +      debugctlmsr |= DEBUGCTLMSR_BTINT;
 +
 +      if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 +              debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 +
 +      if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 +              debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 +
 +      update_debugctlmsr(debugctlmsr);
 +}
 +
 +static void intel_pmu_disable_bts(void)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      unsigned long debugctlmsr;
 +
 +      if (!cpuc->ds)
 +              return;
 +
 +      debugctlmsr = get_debugctlmsr();
 +
 +      debugctlmsr &=
 +              ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
 +                DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 +
 +      update_debugctlmsr(debugctlmsr);
 +}
 +
 +static void intel_pmu_drain_bts_buffer(void)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct debug_store *ds = cpuc->ds;
 +      struct bts_record {
 +              u64     from;
 +              u64     to;
 +              u64     flags;
 +      };
 +      struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
 +      struct bts_record *at, *top;
 +      struct perf_output_handle handle;
 +      struct perf_event_header header;
 +      struct perf_sample_data data;
 +      struct pt_regs regs;
 +
 +      if (!event)
 +              return;
 +
 +      if (!ds)
 +              return;
 +
 +      at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 +      top = (struct bts_record *)(unsigned long)ds->bts_index;
 +
 +      if (top <= at)
 +              return;
 +
 +      ds->bts_index = ds->bts_buffer_base;
 +
 +      perf_sample_data_init(&data, 0);
 +      data.period = event->hw.last_period;
 +      regs.ip     = 0;
 +
 +      /*
 +       * Prepare a generic sample, i.e. fill in the invariant fields.
 +       * We will overwrite the from and to address before we output
 +       * the sample.
 +       */
 +      perf_prepare_sample(&header, &data, event, &regs);
 +
 +      if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
 +              return;
 +
 +      for (; at < top; at++) {
 +              data.ip         = at->from;
 +              data.addr       = at->to;
 +
 +              perf_output_sample(&handle, &header, &data, event);
 +      }
 +
 +      perf_output_end(&handle);
 +
 +      /* There's new data available. */
 +      event->hw.interrupts++;
 +      event->pending_kill = POLL_IN;
 +}
 +
 +/*
 + * PEBS
 + */
 +
 +static struct event_constraint intel_core_pebs_events[] = {
 +      PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 +      PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
 +      EVENT_CONSTRAINT_END
 +};
 +
 +static struct event_constraint intel_nehalem_pebs_events[] = {
 +      PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
 +      PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
 +      PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
 +      PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
 +      EVENT_CONSTRAINT_END
 +};
 +
 +static struct event_constraint *
 +intel_pebs_constraints(struct perf_event *event)
 +{
 +      struct event_constraint *c;
 +
 +      if (!event->attr.precise)
 +              return NULL;
 +
 +      if (x86_pmu.pebs_constraints) {
 +              for_each_event_constraint(c, x86_pmu.pebs_constraints) {
 +                      if ((event->hw.config & c->cmask) == c->code)
 +                              return c;
 +              }
 +      }
 +
 +      return &emptyconstraint;
 +}
 +
 +static void intel_pmu_pebs_enable(struct perf_event *event)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 +
 +      cpuc->pebs_enabled |= 1ULL << hwc->idx;
 +      WARN_ON_ONCE(cpuc->enabled);
 +
 +      if (x86_pmu.intel_cap.pebs_trap)
 +              intel_pmu_lbr_enable(event);
 +}
 +
 +static void intel_pmu_pebs_disable(struct perf_event *event)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 +      if (cpuc->enabled)
 +              wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 +
 +      hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
 +
 +      if (x86_pmu.intel_cap.pebs_trap)
 +              intel_pmu_lbr_disable(event);
 +}
 +
 +static void intel_pmu_pebs_enable_all(void)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +
 +      if (cpuc->pebs_enabled)
 +              wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 +}
 +
 +static void intel_pmu_pebs_disable_all(void)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +
 +      if (cpuc->pebs_enabled)
 +              wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 +}
 +
 +#include <asm/insn.h>
 +
 +static inline bool kernel_ip(unsigned long ip)
 +{
 +#ifdef CONFIG_X86_32
 +      return ip > PAGE_OFFSET;
 +#else
 +      return (long)ip < 0;
 +#endif
 +}
 +
 +static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      unsigned long from = cpuc->lbr_entries[0].from;
 +      unsigned long old_to, to = cpuc->lbr_entries[0].to;
 +      unsigned long ip = regs->ip;
 +
 +      /*
 +       * We don't need to fixup if the PEBS assist is fault like
 +       */
 +      if (!x86_pmu.intel_cap.pebs_trap)
 +              return 1;
 +
 +      /*
 +       * No LBR entry, no basic block, no rewinding
 +       */
 +      if (!cpuc->lbr_stack.nr || !from || !to)
 +              return 0;
 +
 +      /*
 +       * Basic blocks should never cross user/kernel boundaries
 +       */
 +      if (kernel_ip(ip) != kernel_ip(to))
 +              return 0;
 +
 +      /*
 +       * unsigned math, either ip is before the start (impossible) or
 +       * the basic block is larger than 1 page (sanity)
 +       */
 +      if ((ip - to) > PAGE_SIZE)
 +              return 0;
 +
 +      /*
 +       * We sampled a branch insn, rewind using the LBR stack
 +       */
 +      if (ip == to) {
 +              regs->ip = from;
 +              return 1;
 +      }
 +
 +      do {
 +              struct insn insn;
 +              u8 buf[MAX_INSN_SIZE];
 +              void *kaddr;
 +
 +              old_to = to;
 +              if (!kernel_ip(ip)) {
 +                      int bytes, size = MAX_INSN_SIZE;
 +
 +                      bytes = copy_from_user_nmi(buf, (void __user *)to, size);
 +                      if (bytes != size)
 +                              return 0;
 +
 +                      kaddr = buf;
 +              } else
 +                      kaddr = (void *)to;
 +
 +              kernel_insn_init(&insn, kaddr);
 +              insn_get_length(&insn);
 +              to += insn.length;
 +      } while (to < ip);
 +
 +      if (to == ip) {
 +              regs->ip = old_to;
 +              return 1;
 +      }
 +
 +      /*
 +       * Even though we decoded the basic block, the instruction stream
 +       * never matched the given IP, either the TO or the IP got corrupted.
 +       */
 +      return 0;
 +}
 +
 +static int intel_pmu_save_and_restart(struct perf_event *event);
 +
 +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct debug_store *ds = cpuc->ds;
 +      struct perf_event *event = cpuc->events[0]; /* PMC0 only */
 +      struct pebs_record_core *at, *top;
 +      struct perf_sample_data data;
 +      struct perf_raw_record raw;
 +      struct pt_regs regs;
 +      int n;
 +
 +      if (!ds || !x86_pmu.pebs)
 +              return;
 +
 +      at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
 +      top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
 +
 +      /*
 +       * Whatever else happens, drain the thing
 +       */
 +      ds->pebs_index = ds->pebs_buffer_base;
 +
 +      if (!test_bit(0, cpuc->active_mask))
 +              return;
 +
 +      WARN_ON_ONCE(!event);
 +
 +      if (!event->attr.precise)
 +              return;
 +
 +      n = top - at;
 +      if (n <= 0)
 +              return;
 +
 +      if (!intel_pmu_save_and_restart(event))
 +              return;
 +
 +      /*
 +       * Should not happen, we program the threshold at 1 and do not
 +       * set a reset value.
 +       */
 +      WARN_ON_ONCE(n > 1);
 +      at += n - 1;
 +
 +      perf_sample_data_init(&data, 0);
 +      data.period = event->hw.last_period;
 +
 +      if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 +              raw.size = x86_pmu.pebs_record_size;
 +              raw.data = at;
 +              data.raw = &raw;
 +      }
 +
 +      /*
 +       * We use the interrupt regs as a base because the PEBS record
 +       * does not contain a full regs set, specifically it seems to
 +       * lack segment descriptors, which get used by things like
 +       * user_mode().
 +       *
 +       * In the simple case fix up only the IP and BP,SP regs, for
 +       * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
 +       * A possible PERF_SAMPLE_REGS will have to transfer all regs.
 +       */
 +      regs = *iregs;
 +      regs.ip = at->ip;
 +      regs.bp = at->bp;
 +      regs.sp = at->sp;
 +
 +      if (intel_pmu_pebs_fixup_ip(&regs))
 +              regs.flags |= PERF_EFLAGS_EXACT;
 +      else
 +              regs.flags &= ~PERF_EFLAGS_EXACT;
 +
 +      if (perf_event_overflow(event, 1, &data, &regs))
 +              x86_pmu_stop(event);
 +}
 +
 +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct debug_store *ds = cpuc->ds;
 +      struct pebs_record_nhm *at, *top;
 +      struct perf_sample_data data;
 +      struct perf_event *event = NULL;
 +      struct perf_raw_record raw;
 +      struct pt_regs regs;
 +      u64 status = 0;
 +      int bit, n;
 +
 +      if (!ds || !x86_pmu.pebs)
 +              return;
 +
 +      at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
 +      top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 +
 +      ds->pebs_index = ds->pebs_buffer_base;
 +
 +      n = top - at;
 +      if (n <= 0)
 +              return;
 +
 +      /*
 +       * Should not happen, we program the threshold at 1 and do not
 +       * set a reset value.
 +       */
 +      WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
 +
 +      for ( ; at < top; at++) {
++              for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
 +                      event = cpuc->events[bit];
 +                      if (!test_bit(bit, cpuc->active_mask))
 +                              continue;
 +
 +                      WARN_ON_ONCE(!event);
 +
 +                      if (!event->attr.precise)
 +                              continue;
 +
 +                      if (__test_and_set_bit(bit, (unsigned long *)&status))
 +                              continue;
 +
 +                      break;
 +              }
 +
 +              if (!event || bit >= MAX_PEBS_EVENTS)
 +                      continue;
 +
 +              if (!intel_pmu_save_and_restart(event))
 +                      continue;
 +
 +              perf_sample_data_init(&data, 0);
 +              data.period = event->hw.last_period;
 +
 +              if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 +                      raw.size = x86_pmu.pebs_record_size;
 +                      raw.data = at;
 +                      data.raw = &raw;
 +              }
 +
 +              /*
 +               * See the comment in intel_pmu_drain_pebs_core()
 +               */
 +              regs = *iregs;
 +              regs.ip = at->ip;
 +              regs.bp = at->bp;
 +              regs.sp = at->sp;
 +
 +              if (intel_pmu_pebs_fixup_ip(&regs))
 +                      regs.flags |= PERF_EFLAGS_EXACT;
 +              else
 +                      regs.flags &= ~PERF_EFLAGS_EXACT;
 +
 +              if (perf_event_overflow(event, 1, &data, &regs))
 +                      x86_pmu_stop(event);
 +      }
 +}
 +
 +/*
 + * BTS, PEBS probe and setup
 + */
 +
 +static void intel_ds_init(void)
 +{
 +      /*
 +       * No support for 32bit formats
 +       */
 +      if (!boot_cpu_has(X86_FEATURE_DTES64))
 +              return;
 +
 +      x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
 +      x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
 +      if (x86_pmu.pebs) {
 +              char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 +              int format = x86_pmu.intel_cap.pebs_format;
 +
 +              switch (format) {
 +              case 0:
 +                      printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
 +                      x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
 +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
 +                      x86_pmu.pebs_constraints = intel_core_pebs_events;
 +                      break;
 +
 +              case 1:
 +                      printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
 +                      x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
 +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 +                      x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
 +                      break;
 +
 +              default:
 +                      printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
 +                      x86_pmu.pebs = 0;
 +                      break;
 +              }
 +      }
 +}
 +
 +#else /* CONFIG_CPU_SUP_INTEL */
 +
 +static int reserve_ds_buffers(void)
 +{
 +      return 0;
 +}
 +
 +static void release_ds_buffers(void)
 +{
 +}
 +
 +#endif /* CONFIG_CPU_SUP_INTEL */
diff --combined arch/x86/kernel/ptrace.c
index f2fd3b80e565f555a14df09230d2a401b8ca8bed,2e9b55027b7e10e5a8ee31dc270ed7497439384c..055be0afd3305788c6fa529eb17cdfa1958a0a80
@@@ -2,6 -2,9 +2,6 @@@
  /*
   * Pentium III FXSR, SSE support
   *    Gareth Hughes <[email protected]>, May 2000
 - *
 - * BTS tracing
 - *    Markus Metzger <[email protected]>, Dec 2007
   */
  
  #include <linux/kernel.h>
@@@ -9,6 -12,7 +9,7 @@@
  #include <linux/mm.h>
  #include <linux/smp.h>
  #include <linux/errno.h>
+ #include <linux/slab.h>
  #include <linux/ptrace.h>
  #include <linux/regset.h>
  #include <linux/tracehook.h>
@@@ -18,6 -22,7 +19,6 @@@
  #include <linux/audit.h>
  #include <linux/seccomp.h>
  #include <linux/signal.h>
 -#include <linux/workqueue.h>
  #include <linux/perf_event.h>
  #include <linux/hw_breakpoint.h>
  
@@@ -31,6 -36,7 +32,6 @@@
  #include <asm/desc.h>
  #include <asm/prctl.h>
  #include <asm/proto.h>
 -#include <asm/ds.h>
  #include <asm/hw_breakpoint.h>
  
  #include "tls.h"
@@@ -783,6 -789,342 +784,6 @@@ static int ioperm_get(struct task_struc
                                   0, IO_BITMAP_BYTES);
  }
  
 -#ifdef CONFIG_X86_PTRACE_BTS
 -/*
 - * A branch trace store context.
 - *
 - * Contexts may only be installed by ptrace_bts_config() and only for
 - * ptraced tasks.
 - *
 - * Contexts are destroyed when the tracee is detached from the tracer.
 - * The actual destruction work requires interrupts enabled, so the
 - * work is deferred and will be scheduled during __ptrace_unlink().
 - *
 - * Contexts hold an additional task_struct reference on the traced
 - * task, as well as a reference on the tracer's mm.
 - *
 - * Ptrace already holds a task_struct for the duration of ptrace operations,
 - * but since destruction is deferred, it may be executed after both
 - * tracer and tracee exited.
 - */
 -struct bts_context {
 -      /* The branch trace handle. */
 -      struct bts_tracer       *tracer;
 -
 -      /* The buffer used to store the branch trace and its size. */
 -      void                    *buffer;
 -      unsigned int            size;
 -
 -      /* The mm that paid for the above buffer. */
 -      struct mm_struct        *mm;
 -
 -      /* The task this context belongs to. */
 -      struct task_struct      *task;
 -
 -      /* The signal to send on a bts buffer overflow. */
 -      unsigned int            bts_ovfl_signal;
 -
 -      /* The work struct to destroy a context. */
 -      struct work_struct      work;
 -};
 -
 -static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
 -{
 -      void *buffer = NULL;
 -      int err = -ENOMEM;
 -
 -      err = account_locked_memory(current->mm, current->signal->rlim, size);
 -      if (err < 0)
 -              return err;
 -
 -      buffer = kzalloc(size, GFP_KERNEL);
 -      if (!buffer)
 -              goto out_refund;
 -
 -      context->buffer = buffer;
 -      context->size = size;
 -      context->mm = get_task_mm(current);
 -
 -      return 0;
 -
 - out_refund:
 -      refund_locked_memory(current->mm, size);
 -      return err;
 -}
 -
 -static inline void free_bts_buffer(struct bts_context *context)
 -{
 -      if (!context->buffer)
 -              return;
 -
 -      kfree(context->buffer);
 -      context->buffer = NULL;
 -
 -      refund_locked_memory(context->mm, context->size);
 -      context->size = 0;
 -
 -      mmput(context->mm);
 -      context->mm = NULL;
 -}
 -
 -static void free_bts_context_work(struct work_struct *w)
 -{
 -      struct bts_context *context;
 -
 -      context = container_of(w, struct bts_context, work);
 -
 -      ds_release_bts(context->tracer);
 -      put_task_struct(context->task);
 -      free_bts_buffer(context);
 -      kfree(context);
 -}
 -
 -static inline void free_bts_context(struct bts_context *context)
 -{
 -      INIT_WORK(&context->work, free_bts_context_work);
 -      schedule_work(&context->work);
 -}
 -
 -static inline struct bts_context *alloc_bts_context(struct task_struct *task)
 -{
 -      struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
 -      if (context) {
 -              context->task = task;
 -              task->bts = context;
 -
 -              get_task_struct(task);
 -      }
 -
 -      return context;
 -}
 -
 -static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 -                                struct bts_struct __user *out)
 -{
 -      struct bts_context *context;
 -      const struct bts_trace *trace;
 -      struct bts_struct bts;
 -      const unsigned char *at;
 -      int error;
 -
 -      context = child->bts;
 -      if (!context)
 -              return -ESRCH;
 -
 -      trace = ds_read_bts(context->tracer);
 -      if (!trace)
 -              return -ESRCH;
 -
 -      at = trace->ds.top - ((index + 1) * trace->ds.size);
 -      if ((void *)at < trace->ds.begin)
 -              at += (trace->ds.n * trace->ds.size);
 -
 -      if (!trace->read)
 -              return -EOPNOTSUPP;
 -
 -      error = trace->read(context->tracer, at, &bts);
 -      if (error < 0)
 -              return error;
 -
 -      if (copy_to_user(out, &bts, sizeof(bts)))
 -              return -EFAULT;
 -
 -      return sizeof(bts);
 -}
 -
 -static int ptrace_bts_drain(struct task_struct *child,
 -                          long size,
 -                          struct bts_struct __user *out)
 -{
 -      struct bts_context *context;
 -      const struct bts_trace *trace;
 -      const unsigned char *at;
 -      int error, drained = 0;
 -
 -      context = child->bts;
 -      if (!context)
 -              return -ESRCH;
 -
 -      trace = ds_read_bts(context->tracer);
 -      if (!trace)
 -              return -ESRCH;
 -
 -      if (!trace->read)
 -              return -EOPNOTSUPP;
 -
 -      if (size < (trace->ds.top - trace->ds.begin))
 -              return -EIO;
 -
 -      for (at = trace->ds.begin; (void *)at < trace->ds.top;
 -           out++, drained++, at += trace->ds.size) {
 -              struct bts_struct bts;
 -
 -              error = trace->read(context->tracer, at, &bts);
 -              if (error < 0)
 -                      return error;
 -
 -              if (copy_to_user(out, &bts, sizeof(bts)))
 -                      return -EFAULT;
 -      }
 -
 -      memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 -
 -      error = ds_reset_bts(context->tracer);
 -      if (error < 0)
 -              return error;
 -
 -      return drained;
 -}
 -
 -static int ptrace_bts_config(struct task_struct *child,
 -                           long cfg_size,
 -                           const struct ptrace_bts_config __user *ucfg)
 -{
 -      struct bts_context *context;
 -      struct ptrace_bts_config cfg;
 -      unsigned int flags = 0;
 -
 -      if (cfg_size < sizeof(cfg))
 -              return -EIO;
 -
 -      if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 -              return -EFAULT;
 -
 -      context = child->bts;
 -      if (!context)
 -              context = alloc_bts_context(child);
 -      if (!context)
 -              return -ENOMEM;
 -
 -      if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 -              if (!cfg.signal)
 -                      return -EINVAL;
 -
 -              return -EOPNOTSUPP;
 -              context->bts_ovfl_signal = cfg.signal;
 -      }
 -
 -      ds_release_bts(context->tracer);
 -      context->tracer = NULL;
 -
 -      if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
 -              int err;
 -
 -              free_bts_buffer(context);
 -              if (!cfg.size)
 -                      return 0;
 -
 -              err = alloc_bts_buffer(context, cfg.size);
 -              if (err < 0)
 -                      return err;
 -      }
 -
 -      if (cfg.flags & PTRACE_BTS_O_TRACE)
 -              flags |= BTS_USER;
 -
 -      if (cfg.flags & PTRACE_BTS_O_SCHED)
 -              flags |= BTS_TIMESTAMPS;
 -
 -      context->tracer =
 -              ds_request_bts_task(child, context->buffer, context->size,
 -                                  NULL, (size_t)-1, flags);
 -      if (unlikely(IS_ERR(context->tracer))) {
 -              int error = PTR_ERR(context->tracer);
 -
 -              free_bts_buffer(context);
 -              context->tracer = NULL;
 -              return error;
 -      }
 -
 -      return sizeof(cfg);
 -}
 -
 -static int ptrace_bts_status(struct task_struct *child,
 -                           long cfg_size,
 -                           struct ptrace_bts_config __user *ucfg)
 -{
 -      struct bts_context *context;
 -      const struct bts_trace *trace;
 -      struct ptrace_bts_config cfg;
 -
 -      context = child->bts;
 -      if (!context)
 -              return -ESRCH;
 -
 -      if (cfg_size < sizeof(cfg))
 -              return -EIO;
 -
 -      trace = ds_read_bts(context->tracer);
 -      if (!trace)
 -              return -ESRCH;
 -
 -      memset(&cfg, 0, sizeof(cfg));
 -      cfg.size        = trace->ds.end - trace->ds.begin;
 -      cfg.signal      = context->bts_ovfl_signal;
 -      cfg.bts_size    = sizeof(struct bts_struct);
 -
 -      if (cfg.signal)
 -              cfg.flags |= PTRACE_BTS_O_SIGNAL;
 -
 -      if (trace->ds.flags & BTS_USER)
 -              cfg.flags |= PTRACE_BTS_O_TRACE;
 -
 -      if (trace->ds.flags & BTS_TIMESTAMPS)
 -              cfg.flags |= PTRACE_BTS_O_SCHED;
 -
 -      if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
 -              return -EFAULT;
 -
 -      return sizeof(cfg);
 -}
 -
 -static int ptrace_bts_clear(struct task_struct *child)
 -{
 -      struct bts_context *context;
 -      const struct bts_trace *trace;
 -
 -      context = child->bts;
 -      if (!context)
 -              return -ESRCH;
 -
 -      trace = ds_read_bts(context->tracer);
 -      if (!trace)
 -              return -ESRCH;
 -
 -      memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 -
 -      return ds_reset_bts(context->tracer);
 -}
 -
 -static int ptrace_bts_size(struct task_struct *child)
 -{
 -      struct bts_context *context;
 -      const struct bts_trace *trace;
 -
 -      context = child->bts;
 -      if (!context)
 -              return -ESRCH;
 -
 -      trace = ds_read_bts(context->tracer);
 -      if (!trace)
 -              return -ESRCH;
 -
 -      return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 -}
 -
 -/*
 - * Called from __ptrace_unlink() after the child has been moved back
 - * to its original parent.
 - */
 -void ptrace_bts_untrace(struct task_struct *child)
 -{
 -      if (unlikely(child->bts)) {
 -              free_bts_context(child->bts);
 -              child->bts = NULL;
 -      }
 -}
 -#endif /* CONFIG_X86_PTRACE_BTS */
 -
  /*
   * Called by kernel/ptrace.c when detaching..
   *
@@@ -910,6 -1252,39 +911,6 @@@ long arch_ptrace(struct task_struct *ch
                break;
  #endif
  
 -      /*
 -       * These bits need more cooking - not enabled yet:
 -       */
 -#ifdef CONFIG_X86_PTRACE_BTS
 -      case PTRACE_BTS_CONFIG:
 -              ret = ptrace_bts_config
 -                      (child, data, (struct ptrace_bts_config __user *)addr);
 -              break;
 -
 -      case PTRACE_BTS_STATUS:
 -              ret = ptrace_bts_status
 -                      (child, data, (struct ptrace_bts_config __user *)addr);
 -              break;
 -
 -      case PTRACE_BTS_SIZE:
 -              ret = ptrace_bts_size(child);
 -              break;
 -
 -      case PTRACE_BTS_GET:
 -              ret = ptrace_bts_read_record
 -                      (child, data, (struct bts_struct __user *) addr);
 -              break;
 -
 -      case PTRACE_BTS_CLEAR:
 -              ret = ptrace_bts_clear(child);
 -              break;
 -
 -      case PTRACE_BTS_DRAIN:
 -              ret = ptrace_bts_drain
 -                      (child, data, (struct bts_struct __user *) addr);
 -              break;
 -#endif /* CONFIG_X86_PTRACE_BTS */
 -
        default:
                ret = ptrace_request(child, request, addr, data);
                break;
@@@ -1169,6 -1544,14 +1170,6 @@@ long compat_arch_ptrace(struct task_str
  
        case PTRACE_GET_THREAD_AREA:
        case PTRACE_SET_THREAD_AREA:
 -#ifdef CONFIG_X86_PTRACE_BTS
 -      case PTRACE_BTS_CONFIG:
 -      case PTRACE_BTS_STATUS:
 -      case PTRACE_BTS_SIZE:
 -      case PTRACE_BTS_GET:
 -      case PTRACE_BTS_CLEAR:
 -      case PTRACE_BTS_DRAIN:
 -#endif /* CONFIG_X86_PTRACE_BTS */
                return arch_ptrace(child, request, addr, data);
  
        default:
diff --combined include/linux/mm.h
index c8442b6551114ec9bce7037ff0487dc305404096,462acaf36f3a3959cca19e2f3c2e66cc01fe5379..fb19bb92b809d81564cf03af26f5a568f2994629
@@@ -19,6 -19,7 +19,6 @@@ struct anon_vma
  struct file_ra_state;
  struct user_struct;
  struct writeback_control;
 -struct rlimit;
  
  #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
  extern unsigned long max_mapnr;
@@@ -782,8 -783,8 +782,8 @@@ struct mm_walk 
        int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
        int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
        int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
-       int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long,
-                            struct mm_walk *);
+       int (*hugetlb_entry)(pte_t *, unsigned long,
+                            unsigned long, unsigned long, struct mm_walk *);
        struct mm_struct *mm;
        void *private;
  };
@@@ -1448,6 -1449,9 +1448,6 @@@ int vmemmap_populate_basepages(struct p
  int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
  void vmemmap_populate_print_last(void);
  
 -extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 -                               size_t size);
 -extern void refund_locked_memory(struct mm_struct *mm, size_t size);
  
  enum mf_flags {
        MF_COUNT_INCREASED = 1 << 0,
diff --combined kernel/fork.c
index d67f1dbfbe03fe94a05bb97798dc702cf9edee36,44b0791b0a2e378d1ad20e390cbfbec981e7fad8..5d3592deaf71cab3a6dc47acaa38317738e15fee
@@@ -1052,6 -1052,9 +1052,9 @@@ static struct task_struct *copy_process
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
  #endif
+ #if defined(SPLIT_RSS_COUNTING)
+       memset(&p->rss_stat, 0, sizeof(p->rss_stat));
+ #endif
  
        p->default_timer_slack_ns = current->timer_slack_ns;
  
        p->memcg_batch.do_batch = 0;
        p->memcg_batch.memcg = NULL;
  #endif
 -
 -      p->bts = NULL;
 -
        p->stack_start = stack_start;
  
        /* Perform scheduler related setup. Assign this task to a CPU. */
diff --combined kernel/perf_event.c
index 4aa50ff4efc04829c34215283f9a5134925d6c9e,2f3fbf84215a940cc40eccef9c8304964d10906f..fcf42dcd6089b119732601a037b786e909f2c866
@@@ -15,6 -15,7 +15,7 @@@
  #include <linux/smp.h>
  #include <linux/file.h>
  #include <linux/poll.h>
+ #include <linux/slab.h>
  #include <linux/sysfs.h>
  #include <linux/dcache.h>
  #include <linux/percpu.h>
@@@ -1366,8 -1367,6 +1367,8 @@@ void perf_event_task_sched_in(struct ta
        if (cpuctx->task_ctx == ctx)
                return;
  
 +      perf_disable();
 +
        /*
         * We want to keep the following priority order:
         * cpu pinned (that don't need to move), task pinned,
        ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
  
        cpuctx->task_ctx = ctx;
 +
 +      perf_enable();
  }
  
  #define MAX_INTERRUPTS (~0ULL)
@@@ -2645,7 -2642,6 +2646,7 @@@ static int perf_fasync(int fd, struct f
  }
  
  static const struct file_operations perf_fops = {
 +      .llseek                 = no_llseek,
        .release                = perf_release,
        .read                   = perf_read,
        .poll                   = perf_poll,
diff --combined kernel/sched.c
index 1038ca163890eb4625c6751b7cbd611d521af3b6,a3dff1f3f9b0c32070d3ade21f3d2da63708d099..8cafe3ff558fec69a0c3c4676aa61191bb6e84da
@@@ -71,6 -71,7 +71,7 @@@
  #include <linux/debugfs.h>
  #include <linux/ctype.h>
  #include <linux/ftrace.h>
+ #include <linux/slab.h>
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
@@@ -2076,6 -2077,49 +2077,6 @@@ migrate_task(struct task_struct *p, in
        return 1;
  }
  
 -/*
 - * wait_task_context_switch - wait for a thread to complete at least one
 - *                            context switch.
 - *
 - * @p must not be current.
 - */
 -void wait_task_context_switch(struct task_struct *p)
 -{
 -      unsigned long nvcsw, nivcsw, flags;
 -      int running;
 -      struct rq *rq;
 -
 -      nvcsw   = p->nvcsw;
 -      nivcsw  = p->nivcsw;
 -      for (;;) {
 -              /*
 -               * The runqueue is assigned before the actual context
 -               * switch. We need to take the runqueue lock.
 -               *
 -               * We could check initially without the lock but it is
 -               * very likely that we need to take the lock in every
 -               * iteration.
 -               */
 -              rq = task_rq_lock(p, &flags);
 -              running = task_running(rq, p);
 -              task_rq_unlock(rq, &flags);
 -
 -              if (likely(!running))
 -                      break;
 -              /*
 -               * The switch count is incremented before the actual
 -               * context switch. We thus wait for two switches to be
 -               * sure at least one completed.
 -               */
 -              if ((p->nvcsw - nvcsw) > 1)
 -                      break;
 -              if ((p->nivcsw - nivcsw) > 1)
 -                      break;
 -
 -              cpu_relax();
 -      }
 -}
 -
  /*
   * wait_task_inactive - wait for a thread to unschedule.
   *
@@@ -5344,7 -5388,7 +5345,7 @@@ int set_cpus_allowed_ptr(struct task_st
  
                get_task_struct(mt);
                task_rq_unlock(rq, &flags);
-               wake_up_process(rq->migration_thread);
+               wake_up_process(mt);
                put_task_struct(mt);
                wait_for_completion(&req.done);
                tlb_migrate_finish(p->mm);
index a7084e7c04270ad0116d6ef54e7184005fa0ab3f,81003b4d617fad5a966c2ed89fba1e5b47ddffdd..1cc9858258b33468627f0c9c787d8401480a152e
@@@ -3,6 -3,7 +3,7 @@@
  #include <linux/stringify.h>
  #include <linux/kthread.h>
  #include <linux/delay.h>
+ #include <linux/slab.h>
  
  static inline int trace_valid_entry(struct trace_entry *entry)
  {
@@@ -16,6 -17,7 +17,6 @@@
        case TRACE_BRANCH:
        case TRACE_GRAPH_ENT:
        case TRACE_GRAPH_RET:
 -      case TRACE_HW_BRANCHES:
        case TRACE_KSYM:
                return 1;
        }
@@@ -753,6 -755,62 +754,6 @@@ trace_selftest_startup_branch(struct tr
  }
  #endif /* CONFIG_BRANCH_TRACER */
  
 -#ifdef CONFIG_HW_BRANCH_TRACER
 -int
 -trace_selftest_startup_hw_branches(struct tracer *trace,
 -                                 struct trace_array *tr)
 -{
 -      struct trace_iterator *iter;
 -      struct tracer tracer;
 -      unsigned long count;
 -      int ret;
 -
 -      if (!trace->open) {
 -              printk(KERN_CONT "missing open function...");
 -              return -1;
 -      }
 -
 -      ret = tracer_init(trace, tr);
 -      if (ret) {
 -              warn_failed_init_tracer(trace, ret);
 -              return ret;
 -      }
 -
 -      /*
 -       * The hw-branch tracer needs to collect the trace from the various
 -       * cpu trace buffers - before tracing is stopped.
 -       */
 -      iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 -      if (!iter)
 -              return -ENOMEM;
 -
 -      memcpy(&tracer, trace, sizeof(tracer));
 -
 -      iter->trace = &tracer;
 -      iter->tr = tr;
 -      iter->pos = -1;
 -      mutex_init(&iter->mutex);
 -
 -      trace->open(iter);
 -
 -      mutex_destroy(&iter->mutex);
 -      kfree(iter);
 -
 -      tracing_stop();
 -
 -      ret = trace_test_buffer(tr, &count);
 -      trace->reset(tr);
 -      tracing_start();
 -
 -      if (!ret && !count) {
 -              printk(KERN_CONT "no entries found..");
 -              ret = -1;
 -      }
 -
 -      return ret;
 -}
 -#endif /* CONFIG_HW_BRANCH_TRACER */
 -
  #ifdef CONFIG_KSYM_TRACER
  static int ksym_selftest_dummy;
  
This page took 0.202793 seconds and 4 git commands to generate.