Merge branch 'linus' into perf/core

author Ingo Molnar <[email protected]>

Thu, 8 Apr 2010 11:36:36 +0000 (13:36 +0200)

committer Ingo Molnar <[email protected]>

Thu, 8 Apr 2010 11:37:18 +0000 (13:37 +0200)
author Ingo Molnar <[email protected]>
Thu, 8 Apr 2010 11:36:36 +0000 (13:36 +0200)
committer Ingo Molnar <[email protected]>
Thu, 8 Apr 2010 11:37:18 +0000 (13:37 +0200)
diff --combined MAINTAINERS

index 6c858e89c7d01c2fc5849366b49ed4c38d058075,7a9ccda2a3070183af6f19e361181ed2bc1bade2..c3e9c3633b75ca89bee21c126339be1053717770
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -2474,12 -2474,6 +2474,6 @@@ L:     [email protected]
   S:    Odd Fixes
   F:    drivers/char/hvc_*
   
- VIRTIO CONSOLE DRIVER
- M:    Amit Shah <[email protected]>
- L:    [email protected]
- S:    Maintained
- F:    drivers/char/virtio_console.c
- 
   iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER
   M:    Peter Jones <[email protected]>
   M:    Konrad Rzeszutek Wilk <[email protected]>
@@@ -4349,13 -4343,13 +4343,13 @@@ M:   Paul Mackerras <[email protected]
   M:    Ingo Molnar <[email protected]>
   M:    Arnaldo Carvalho de Melo <[email protected]>
   S:    Supported
- -F:    kernel/perf_event.c
+ +F:    kernel/perf_event*.c
   F:    include/linux/perf_event.h
- -F:    arch/*/kernel/perf_event.c
- -F:    arch/*/kernel/*/perf_event.c
- -F:    arch/*/kernel/*/*/perf_event.c
+ +F:    arch/*/kernel/perf_event*.c
+ +F:    arch/*/kernel/*/perf_event*.c
+ +F:    arch/*/kernel/*/*/perf_event*.c
   F:    arch/*/include/asm/perf_event.h
- -F:    arch/*/lib/perf_event.c
+ +F:    arch/*/lib/perf_event*.c
   F:    arch/*/kernel/perf_callchain.c
   F:    tools/perf/
   
@@@ -5971,6 -5965,13 +5965,13 @@@ S:    Maintaine
   F:    Documentation/filesystems/vfat.txt
   F:    fs/fat/
   
+ VIRTIO CONSOLE DRIVER
+ M:    Amit Shah <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    drivers/char/virtio_console.c
+ F:    include/linux/virtio_console.h
+ 
   VIRTIO HOST (VHOST)
   M:    "Michael S. Tsirkin" <[email protected]>
   L:    [email protected]
diff --combined arch/x86/Kconfig

index 7191b6eb16d8799277a52064c26f05de40b0bba9,9458685902bddecc3f221012389085084bfb23ef..97a95dfd118110f908e8e916897458d885a6ae94
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -58,9 -58,6 +58,9 @@@ config X8
         select HAVE_ARCH_KMEMCHECK
         select HAVE_USER_RETURN_NOTIFIER
   
+ +config INSTRUCTION_DECODER
+ +      def_bool (KPROBES || PERF_EVENTS)
+ +
   config OUTPUT_FORMAT
         string
         default "elf32-i386" if X86_32
@@@ -1219,8 -1216,8 +1219,8 @@@ config NUMA_EM
   
   config NODES_SHIFT
         int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
-       range 1 9
-       default "9" if MAXSMP
+       range 1 10
+       default "10" if MAXSMP
         default "6" if X86_64
         default "4" if X86_NUMAQ
         default "3"
diff --combined arch/x86/kernel/cpu/perf_event.c

index 353a174adb4478bd7ece980169d706cb0c7143d7,db5bdc8addf82f1df406488025a5b4877ed53419..626154a9f535b28d53140b60e5b360a6a1a0db75
--- 1/arch/x86/kernel/cpu/perf_event.c
--- 2/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@@ -21,6 -21,7 +21,7 @@@
   #include <linux/kdebug.h>
   #include <linux/sched.h>
   #include <linux/uaccess.h>
+ #include <linux/slab.h>
   #include <linux/highmem.h>
   #include <linux/cpu.h>
   #include <linux/bitops.h>
@@@ -30,51 -31,46 +31,51 @@@
   #include <asm/nmi.h>
   #include <asm/compat.h>
   
- -static u64 perf_event_mask __read_mostly;
+ +#if 0
+ +#undef wrmsrl
+ +#define wrmsrl(msr, val)                                      \
+ +do {                                                          \
+ +      trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
+ +                      (unsigned long)(val));                  \
+ +      native_write_msr((msr), (u32)((u64)(val)),              \
+ +                      (u32)((u64)(val) >> 32));               \
+ +} while (0)
+ +#endif
   
- -/* The maximal number of PEBS events: */
- -#define MAX_PEBS_EVENTS       4
+ +/*
+ + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ + */
+ +static unsigned long
+ +copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+ +{
+ +      unsigned long offset, addr = (unsigned long)from;
+ +      int type = in_nmi() ? KM_NMI : KM_IRQ0;
+ +      unsigned long size, len = 0;
+ +      struct page *page;
+ +      void *map;
+ +      int ret;
   
- -/* The size of a BTS record in bytes: */
- -#define BTS_RECORD_SIZE               24
+ +      do {
+ +              ret = __get_user_pages_fast(addr, 1, 0, &page);
+ +              if (!ret)
+ +                      break;
   
- -/* The size of a per-cpu BTS buffer in bytes: */
- -#define BTS_BUFFER_SIZE               (BTS_RECORD_SIZE * 2048)
+ +              offset = addr & (PAGE_SIZE - 1);
+ +              size = min(PAGE_SIZE - offset, n - len);
   
- -/* The BTS overflow threshold in bytes from the end of the buffer: */
- -#define BTS_OVFL_TH           (BTS_RECORD_SIZE * 128)
+ +              map = kmap_atomic(page, type);
+ +              memcpy(to, map+offset, size);
+ +              kunmap_atomic(map, type);
+ +              put_page(page);
   
+ +              len  += size;
+ +              to   += size;
+ +              addr += size;
   
- -/*
- - * Bits in the debugctlmsr controlling branch tracing.
- - */
- -#define X86_DEBUGCTL_TR                       (1 << 6)
- -#define X86_DEBUGCTL_BTS              (1 << 7)
- -#define X86_DEBUGCTL_BTINT            (1 << 8)
- -#define X86_DEBUGCTL_BTS_OFF_OS               (1 << 9)
- -#define X86_DEBUGCTL_BTS_OFF_USR      (1 << 10)
+ +      } while (len < n);
   
- -/*
- - * A debug store configuration.
- - *
- - * We only support architectures that use 64bit fields.
- - */
- -struct debug_store {
- -      u64     bts_buffer_base;
- -      u64     bts_index;
- -      u64     bts_absolute_maximum;
- -      u64     bts_interrupt_threshold;
- -      u64     pebs_buffer_base;
- -      u64     pebs_index;
- -      u64     pebs_absolute_maximum;
- -      u64     pebs_interrupt_threshold;
- -      u64     pebs_event_reset[MAX_PEBS_EVENTS];
- -};
+ +      return len;
+ +}
   
   struct event_constraint {
         union {
@@@ -93,39 -89,18 +94,39 @@@ struct amd_nb 
         struct event_constraint event_constraints[X86_PMC_IDX_MAX];
   };
   
+ +#define MAX_LBR_ENTRIES               16
+ +
   struct cpu_hw_events {
+ +      /*
+ +       * Generic x86 PMC bits
+ +       */
         struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- -      unsigned long           interrupts;
         int                     enabled;
- -      struct debug_store      *ds;
   
         int                     n_events;
         int                     n_added;
         int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
         u64                     tags[X86_PMC_IDX_MAX];
         struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ +
+ +      /*
+ +       * Intel DebugStore bits
+ +       */
+ +      struct debug_store      *ds;
+ +      u64                     pebs_enabled;
+ +
+ +      /*
+ +       * Intel LBR bits
+ +       */
+ +      int                             lbr_users;
+ +      void                            *lbr_context;
+ +      struct perf_branch_stack        lbr_stack;
+ +      struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
+ +
+ +      /*
+ +       * AMD specific bits
+ +       */
         struct amd_nb           *amd_nb;
   };
   
@@@ -139,31 -114,11 +140,31 @@@
   #define EVENT_CONSTRAINT(c, n, m)     \
         __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
   
+ +/*
+ + * Constraint on the Event code.
+ + */
   #define INTEL_EVENT_CONSTRAINT(c, n)  \
- -      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+ +      EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
   
+ +/*
+ + * Constraint on the Event code + UMask + fixed-mask
+ + *
+ + * filter mask to validate fixed counter events.
+ + * the following filters disqualify for fixed counters:
+ + *  - inv
+ + *  - edge
+ + *  - cnt-mask
+ + *  The other filters are supported by fixed counters.
+ + *  The any-thread option is supported starting with v3.
+ + */
   #define FIXED_EVENT_CONSTRAINT(c, n)  \
- -      EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
+ +      EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+ +
+ +/*
+ + * Constraint on the Event code + UMask
+ + */
+ +#define PEBS_EVENT_CONSTRAINT(c, n)   \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
   
   #define EVENT_CONSTRAINT_END          \
         EVENT_CONSTRAINT(0, 0, 0)
@@@ -171,43 -126,32 +172,43 @@@
   #define for_each_event_constraint(e, c)       \
         for ((e) = (c); (e)->cmask; (e)++)
   
+ +union perf_capabilities {
+ +      struct {
+ +              u64     lbr_format    : 6;
+ +              u64     pebs_trap     : 1;
+ +              u64     pebs_arch_reg : 1;
+ +              u64     pebs_format   : 4;
+ +              u64     smm_freeze    : 1;
+ +      };
+ +      u64     capabilities;
+ +};
+ +
   /*
    * struct x86_pmu - generic x86 pmu
    */
   struct x86_pmu {
+ +      /*
+ +       * Generic x86 PMC bits
+ +       */
         const char      *name;
         int             version;
         int             (*handle_irq)(struct pt_regs *);
         void            (*disable_all)(void);
- -      void            (*enable_all)(void);
+ +      void            (*enable_all)(int added);
         void            (*enable)(struct perf_event *);
         void            (*disable)(struct perf_event *);
+ +      int             (*hw_config)(struct perf_event *event);
+ +      int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
         unsigned        eventsel;
         unsigned        perfctr;
         u64             (*event_map)(int);
- -      u64             (*raw_event)(u64);
         int             max_events;
- -      int             num_events;
- -      int             num_events_fixed;
- -      int             event_bits;
- -      u64             event_mask;
+ +      int             num_counters;
+ +      int             num_counters_fixed;
+ +      int             cntval_bits;
+ +      u64             cntval_mask;
         int             apic;
         u64             max_period;
- -      u64             intel_ctrl;
- -      void            (*enable_bts)(u64 config);
- -      void            (*disable_bts)(void);
- -
         struct event_constraint *
                         (*get_event_constraints)(struct cpu_hw_events *cpuc,
                                                  struct perf_event *event);
@@@ -215,32 -159,11 +216,32 @@@
         void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                  struct perf_event *event);
         struct event_constraint *event_constraints;
+ +      void            (*quirks)(void);
   
         int             (*cpu_prepare)(int cpu);
         void            (*cpu_starting)(int cpu);
         void            (*cpu_dying)(int cpu);
         void            (*cpu_dead)(int cpu);
+ +
+ +      /*
+ +       * Intel Arch Perfmon v2+
+ +       */
+ +      u64                     intel_ctrl;
+ +      union perf_capabilities intel_cap;
+ +
+ +      /*
+ +       * Intel DebugStore bits
+ +       */
+ +      int             bts, pebs;
+ +      int             pebs_record_size;
+ +      void            (*drain_pebs)(struct pt_regs *regs);
+ +      struct event_constraint *pebs_constraints;
+ +
+ +      /*
+ +       * Intel LBR
+ +       */
+ +      unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+ +      int             lbr_nr;                    /* hardware stack size */
   };
   
   static struct x86_pmu x86_pmu __read_mostly;
@@@ -275,7 -198,7 +276,7 @@@ static u6
   x86_perf_event_update(struct perf_event *event)
   {
         struct hw_perf_event *hwc = &event->hw;
- -      int shift = 64 - x86_pmu.event_bits;
+ +      int shift = 64 - x86_pmu.cntval_bits;
         u64 prev_raw_count, new_raw_count;
         int idx = hwc->idx;
         s64 delta;
@@@ -318,32 -241,33 +319,32 @@@ again
   static atomic_t active_events;
   static DEFINE_MUTEX(pmc_reserve_mutex);
   
+ +#ifdef CONFIG_X86_LOCAL_APIC
+ +
   static bool reserve_pmc_hardware(void)
   {
- -#ifdef CONFIG_X86_LOCAL_APIC
         int i;
   
         if (nmi_watchdog == NMI_LOCAL_APIC)
                 disable_lapic_nmi_watchdog();
   
- -      for (i = 0; i < x86_pmu.num_events; i++) {
+ +      for (i = 0; i < x86_pmu.num_counters; i++) {
                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                         goto perfctr_fail;
         }
   
- -      for (i = 0; i < x86_pmu.num_events; i++) {
+ +      for (i = 0; i < x86_pmu.num_counters; i++) {
                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
                         goto eventsel_fail;
         }
- -#endif
   
         return true;
   
- -#ifdef CONFIG_X86_LOCAL_APIC
   eventsel_fail:
         for (i--; i >= 0; i--)
                 release_evntsel_nmi(x86_pmu.eventsel + i);
   
- -      i = x86_pmu.num_events;
+ +      i = x86_pmu.num_counters;
   
   perfctr_fail:
         for (i--; i >= 0; i--)
@@@ -353,36 -277,128 +354,36 @@@
                 enable_lapic_nmi_watchdog();
   
         return false;
- -#endif
   }
   
   static void release_pmc_hardware(void)
   {
- -#ifdef CONFIG_X86_LOCAL_APIC
         int i;
   
- -      for (i = 0; i < x86_pmu.num_events; i++) {
+ +      for (i = 0; i < x86_pmu.num_counters; i++) {
                 release_perfctr_nmi(x86_pmu.perfctr + i);
                 release_evntsel_nmi(x86_pmu.eventsel + i);
         }
   
         if (nmi_watchdog == NMI_LOCAL_APIC)
                 enable_lapic_nmi_watchdog();
- -#endif
- -}
- -
- -static inline bool bts_available(void)
- -{
- -      return x86_pmu.enable_bts != NULL;
- -}
- -
- -static void init_debug_store_on_cpu(int cpu)
- -{
- -      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- -
- -      if (!ds)
- -              return;
- -
- -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
- -                   (u32)((u64)(unsigned long)ds),
- -                   (u32)((u64)(unsigned long)ds >> 32));
- -}
- -
- -static void fini_debug_store_on_cpu(int cpu)
- -{
- -      if (!per_cpu(cpu_hw_events, cpu).ds)
- -              return;
- -
- -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
- -}
- -
- -static void release_bts_hardware(void)
- -{
- -      int cpu;
- -
- -      if (!bts_available())
- -              return;
- -
- -      get_online_cpus();
- -
- -      for_each_online_cpu(cpu)
- -              fini_debug_store_on_cpu(cpu);
- -
- -      for_each_possible_cpu(cpu) {
- -              struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- -
- -              if (!ds)
- -                      continue;
- -
- -              per_cpu(cpu_hw_events, cpu).ds = NULL;
- -
- -              kfree((void *)(unsigned long)ds->bts_buffer_base);
- -              kfree(ds);
- -      }
- -
- -      put_online_cpus();
   }
   
- -static int reserve_bts_hardware(void)
- -{
- -      int cpu, err = 0;
- -
- -      if (!bts_available())
- -              return 0;
- -
- -      get_online_cpus();
- -
- -      for_each_possible_cpu(cpu) {
- -              struct debug_store *ds;
- -              void *buffer;
- -
- -              err = -ENOMEM;
- -              buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
- -              if (unlikely(!buffer))
- -                      break;
- -
- -              ds = kzalloc(sizeof(*ds), GFP_KERNEL);
- -              if (unlikely(!ds)) {
- -                      kfree(buffer);
- -                      break;
- -              }
+ +#else
   
- -              ds->bts_buffer_base = (u64)(unsigned long)buffer;
- -              ds->bts_index = ds->bts_buffer_base;
- -              ds->bts_absolute_maximum =
- -                      ds->bts_buffer_base + BTS_BUFFER_SIZE;
- -              ds->bts_interrupt_threshold =
- -                      ds->bts_absolute_maximum - BTS_OVFL_TH;
+ +static bool reserve_pmc_hardware(void) { return true; }
+ +static void release_pmc_hardware(void) {}
   
- -              per_cpu(cpu_hw_events, cpu).ds = ds;
- -              err = 0;
- -      }
- -
- -      if (err)
- -              release_bts_hardware();
- -      else {
- -              for_each_online_cpu(cpu)
- -                      init_debug_store_on_cpu(cpu);
- -      }
- -
- -      put_online_cpus();
+ +#endif
   
- -      return err;
- -}
+ +static int reserve_ds_buffers(void);
+ +static void release_ds_buffers(void);
   
   static void hw_perf_event_destroy(struct perf_event *event)
   {
         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
                 release_pmc_hardware();
- -              release_bts_hardware();
+ +              release_ds_buffers();
                 mutex_unlock(&pmc_reserve_mutex);
         }
   }
@@@ -425,28 -441,6 +426,28 @@@ set_ext_hw_attr(struct hw_perf_event *h
         return 0;
   }
   
+ +static int x86_pmu_hw_config(struct perf_event *event)
+ +{
+ +      /*
+ +       * Generate PMC IRQs:
+ +       * (keep 'enabled' bit clear for now)
+ +       */
+ +      event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
+ +
+ +      /*
+ +       * Count user and OS events unless requested not to
+ +       */
+ +      if (!event->attr.exclude_user)
+ +              event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+ +      if (!event->attr.exclude_kernel)
+ +              event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
+ +
+ +      if (event->attr.type == PERF_TYPE_RAW)
+ +              event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+ +
+ +      return 0;
+ +}
+ +
   /*
    * Setup the hardware configuration for a given attr_type
    */
@@@ -466,11 -460,8 +467,11 @@@ static int __hw_perf_event_init(struct 
                 if (atomic_read(&active_events) == 0) {
                         if (!reserve_pmc_hardware())
                                 err = -EBUSY;
- -                      else
- -                              err = reserve_bts_hardware();
+ +                      else {
+ +                              err = reserve_ds_buffers();
+ +                              if (err)
+ +                                      release_pmc_hardware();
+ +                      }
                 }
                 if (!err)
                         atomic_inc(&active_events);
@@@ -481,14 -472,23 +482,14 @@@
   
         event->destroy = hw_perf_event_destroy;
   
- -      /*
- -       * Generate PMC IRQs:
- -       * (keep 'enabled' bit clear for now)
- -       */
- -      hwc->config = ARCH_PERFMON_EVENTSEL_INT;
- -
         hwc->idx = -1;
         hwc->last_cpu = -1;
         hwc->last_tag = ~0ULL;
   
- -      /*
- -       * Count user and OS events unless requested not to.
- -       */
- -      if (!attr->exclude_user)
- -              hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
- -      if (!attr->exclude_kernel)
- -              hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+ +      /* Processor specifics */
+ +      err = x86_pmu.hw_config(event);
+ +      if (err)
+ +              return err;
   
         if (!hwc->sample_period) {
                 hwc->sample_period = x86_pmu.max_period;
@@@ -505,8 -505,16 +506,8 @@@
                         return -EOPNOTSUPP;
         }
   
- -      /*
- -       * Raw hw_event type provide the config in the hw_event structure
- -       */
- -      if (attr->type == PERF_TYPE_RAW) {
- -              hwc->config |= x86_pmu.raw_event(attr->config);
- -              if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
- -                  perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- -                      return -EACCES;
+ +      if (attr->type == PERF_TYPE_RAW)
                 return 0;
- -      }
   
         if (attr->type == PERF_TYPE_HW_CACHE)
                 return set_ext_hw_attr(hwc, attr);
@@@ -531,11 -539,11 +532,11 @@@
         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
             (hwc->sample_period == 1)) {
                 /* BTS is not supported by this architecture. */
- -              if (!bts_available())
+ +              if (!x86_pmu.bts)
                         return -EOPNOTSUPP;
   
                 /* BTS is currently only allowed for user-mode. */
- -              if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+ +              if (!attr->exclude_kernel)
                         return -EOPNOTSUPP;
         }
   
@@@ -549,7 -557,7 +550,7 @@@ static void x86_pmu_disable_all(void
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         int idx;
   
- -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                 u64 val;
   
                 if (!test_bit(idx, cpuc->active_mask))
@@@ -579,12 -587,12 +580,12 @@@ void hw_perf_disable(void
         x86_pmu.disable_all();
   }
   
- -static void x86_pmu_enable_all(void)
+ +static void x86_pmu_enable_all(int added)
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         int idx;
   
- -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                 struct perf_event *event = cpuc->events[idx];
                 u64 val;
   
@@@ -659,14 -667,14 +660,14 @@@ static int x86_schedule_events(struct c
          * assign events to counters starting with most
          * constrained events.
          */
- -      wmax = x86_pmu.num_events;
+ +      wmax = x86_pmu.num_counters;
   
         /*
          * when fixed event counters are present,
          * wmax is incremented by 1 to account
          * for one more choice
          */
- -      if (x86_pmu.num_events_fixed)
+ +      if (x86_pmu.num_counters_fixed)
                 wmax++;
   
         for (w = 1, num = n; num && w <= wmax; w++) {
@@@ -716,7 -724,7 +717,7 @@@ static int collect_events(struct cpu_hw
         struct perf_event *event;
         int n, max_count;
   
- -      max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+ +      max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
   
         /* current number of events already accepted */
         n = cpuc->n_events;
@@@ -787,7 -795,7 +788,7 @@@ void hw_perf_enable(void
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         struct perf_event *event;
         struct hw_perf_event *hwc;
- -      int i;
+ +      int i, added = cpuc->n_added;
   
         if (!x86_pmu_initialized())
                 return;
@@@ -839,20 -847,19 +840,20 @@@
         cpuc->enabled = 1;
         barrier();
   
- -      x86_pmu.enable_all();
+ +      x86_pmu.enable_all(added);
   }
   
   static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
   {
- -      (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ +      wrmsrl(hwc->config_base + hwc->idx,
                               hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
   }
   
   static inline void x86_pmu_disable_event(struct perf_event *event)
   {
         struct hw_perf_event *hwc = &event->hw;
- -      (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ +
+ +      wrmsrl(hwc->config_base + hwc->idx, hwc->config);
   }
   
   static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@@ -867,7 -874,7 +868,7 @@@ x86_perf_event_set_period(struct perf_e
         struct hw_perf_event *hwc = &event->hw;
         s64 left = atomic64_read(&hwc->period_left);
         s64 period = hwc->sample_period;
- -      int err, ret = 0, idx = hwc->idx;
+ +      int ret = 0, idx = hwc->idx;
   
         if (idx == X86_PMC_IDX_FIXED_BTS)
                 return 0;
@@@ -905,8 -912,8 +906,8 @@@
          */
         atomic64_set(&hwc->prev_count, (u64)-left);
   
- -      err = checking_wrmsrl(hwc->event_base + idx,
- -                           (u64)(-left) & x86_pmu.event_mask);
+ +      wrmsrl(hwc->event_base + idx,
+ +                      (u64)(-left) & x86_pmu.cntval_mask);
   
         perf_event_update_userpage(event);
   
@@@ -943,7 -950,7 +944,7 @@@ static int x86_pmu_enable(struct perf_e
         if (n < 0)
                 return n;
   
- -      ret = x86_schedule_events(cpuc, n, assign);
+ +      ret = x86_pmu.schedule_events(cpuc, n, assign);
         if (ret)
                 return ret;
         /*
@@@ -984,12 -991,11 +985,12 @@@ static void x86_pmu_unthrottle(struct p
   void perf_event_print_debug(void)
   {
         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+ +      u64 pebs;
         struct cpu_hw_events *cpuc;
         unsigned long flags;
         int cpu, idx;
   
- -      if (!x86_pmu.num_events)
+ +      if (!x86_pmu.num_counters)
                 return;
   
         local_irq_save(flags);
@@@ -1002,18 -1008,16 +1003,18 @@@
                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+ +              rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
   
                 pr_info("\n");
                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+ +              pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
         }
- -      pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
+ +      pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
   
- -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
   
@@@ -1026,7 -1030,7 +1027,7 @@@
                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
                         cpu, idx, prev_left);
         }
- -      for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
   
                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@@ -1091,7 -1095,7 +1092,7 @@@ static int x86_pmu_handle_irq(struct pt
   
         cpuc = &__get_cpu_var(cpu_hw_events);
   
- -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                 if (!test_bit(idx, cpuc->active_mask))
                         continue;
   
@@@ -1099,7 -1103,7 +1100,7 @@@
                 hwc = &event->hw;
   
                 val = x86_perf_event_update(event);
- -              if (val & (1ULL << (x86_pmu.event_bits - 1)))
+ +              if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
                         continue;
   
                 /*
@@@ -1142,6 -1146,7 +1143,6 @@@ void set_perf_event_pending(void
   
   void perf_events_lapic_init(void)
   {
- -#ifdef CONFIG_X86_LOCAL_APIC
         if (!x86_pmu.apic || !x86_pmu_initialized())
                 return;
   
@@@ -1149,6 -1154,7 +1150,6 @@@
          * Always use NMI for PMU
          */
         apic_write(APIC_LVTPC, APIC_DM_NMI);
- -#endif
   }
   
   static int __kprobes
@@@ -1172,7 -1178,9 +1173,7 @@@ perf_event_nmi_handler(struct notifier_
   
         regs = args->regs;
   
- -#ifdef CONFIG_X86_LOCAL_APIC
         apic_write(APIC_LVTPC, APIC_DM_NMI);
- -#endif
         /*
          * Can't rely on the handled return value to say it was our NMI, two
          * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@@ -1266,15 -1274,12 +1267,15 @@@ int hw_perf_group_sched_in(struct perf_
         int assign[X86_PMC_IDX_MAX];
         int n0, n1, ret;
   
+ +      if (!x86_pmu_initialized())
+ +              return 0;
+ +
         /* n0 = total number of events */
         n0 = collect_events(cpuc, leader, true);
         if (n0 < 0)
                 return n0;
   
- -      ret = x86_schedule_events(cpuc, n0, assign);
+ +      ret = x86_pmu.schedule_events(cpuc, n0, assign);
         if (ret)
                 return ret;
   
@@@ -1324,9 -1329,6 +1325,9 @@@ undo
   
   #include "perf_event_amd.c"
   #include "perf_event_p6.c"
+ +#include "perf_event_p4.c"
+ +#include "perf_event_intel_lbr.c"
+ +#include "perf_event_intel_ds.c"
   #include "perf_event_intel.c"
   
   static int __cpuinit
@@@ -1400,50 -1402,48 +1401,50 @@@ void __init init_hw_perf_events(void
   
         pr_cont("%s PMU driver.\n", x86_pmu.name);
   
- -      if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+ +      if (x86_pmu.quirks)
+ +              x86_pmu.quirks();
+ +
+ +      if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- -                   x86_pmu.num_events, X86_PMC_MAX_GENERIC);
- -              x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+ +                   x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
+ +              x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
         }
- -      perf_event_mask = (1 << x86_pmu.num_events) - 1;
- -      perf_max_events = x86_pmu.num_events;
+ +      x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+ +      perf_max_events = x86_pmu.num_counters;
   
- -      if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+ +      if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- -                   x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
- -              x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+ +                   x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
+ +              x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
         }
   
- -      perf_event_mask |=
- -              ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
- -      x86_pmu.intel_ctrl = perf_event_mask;
+ +      x86_pmu.intel_ctrl |=
+ +              ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
   
         perf_events_lapic_init();
         register_die_notifier(&perf_event_nmi_notifier);
   
         unconstrained = (struct event_constraint)
- -              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
- -                                 0, x86_pmu.num_events);
+ +              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+ +                                 0, x86_pmu.num_counters);
   
         if (x86_pmu.event_constraints) {
                 for_each_event_constraint(c, x86_pmu.event_constraints) {
- -                      if (c->cmask != INTEL_ARCH_FIXED_MASK)
+ +                      if (c->cmask != X86_RAW_EVENT_MASK)
                                 continue;
   
- -                      c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
- -                      c->weight += x86_pmu.num_events;
+ +                      c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+ +                      c->weight += x86_pmu.num_counters;
                 }
         }
   
         pr_info("... version:                %d\n",     x86_pmu.version);
- -      pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
- -      pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
- -      pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+ +      pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
+ +      pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
+ +      pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
- -      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
- -      pr_info("... event mask:             %016Lx\n", perf_event_mask);
+ +      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
+ +      pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
   
         perf_cpu_notifier(x86_pmu_notifier);
   }
@@@ -1462,32 -1462,6 +1463,32 @@@ static const struct pmu pmu = 
         .unthrottle     = x86_pmu_unthrottle,
   };
   
+ +/*
+ + * validate that we can schedule this event
+ + */
+ +static int validate_event(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *fake_cpuc;
+ +      struct event_constraint *c;
+ +      int ret = 0;
+ +
+ +      fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+ +      if (!fake_cpuc)
+ +              return -ENOMEM;
+ +
+ +      c = x86_pmu.get_event_constraints(fake_cpuc, event);
+ +
+ +      if (!c || !c->weight)
+ +              ret = -ENOSPC;
+ +
+ +      if (x86_pmu.put_event_constraints)
+ +              x86_pmu.put_event_constraints(fake_cpuc, event);
+ +
+ +      kfree(fake_cpuc);
+ +
+ +      return ret;
+ +}
+ +
   /*
    * validate a single event group
    *
@@@ -1528,7 -1502,7 +1529,7 @@@ static int validate_group(struct perf_e
   
         fake_cpuc->n_events = n;
   
- -      ret = x86_schedule_events(fake_cpuc, n, NULL);
+ +      ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
   
   out_free:
         kfree(fake_cpuc);
@@@ -1553,8 -1527,6 +1554,8 @@@ const struct pmu *hw_perf_event_init(st
   
                 if (event->group_leader != event)
                         err = validate_group(event);
+ +              else
+ +                      err = validate_event(event);
   
                 event->pmu = tmp;
         }
@@@ -1602,7 -1574,8 +1603,7 @@@ static void backtrace_address(void *dat
   {
         struct perf_callchain_entry *entry = data;
   
- -      if (reliable)
- -              callchain_store(entry, addr);
+ +      callchain_store(entry, addr);
   }
   
   static const struct stacktrace_ops backtrace_ops = {
@@@ -1624,6 -1597,41 +1625,6 @@@ perf_callchain_kernel(struct pt_regs *r
         dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
   }
   
- -/*
- - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- - */
- -static unsigned long
- -copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
- -{
- -      unsigned long offset, addr = (unsigned long)from;
- -      int type = in_nmi() ? KM_NMI : KM_IRQ0;
- -      unsigned long size, len = 0;
- -      struct page *page;
- -      void *map;
- -      int ret;
- -
- -      do {
- -              ret = __get_user_pages_fast(addr, 1, 0, &page);
- -              if (!ret)
- -                      break;
- -
- -              offset = addr & (PAGE_SIZE - 1);
- -              size = min(PAGE_SIZE - offset, n - len);
- -
- -              map = kmap_atomic(page, type);
- -              memcpy(to, map+offset, size);
- -              kunmap_atomic(map, type);
- -              put_page(page);
- -
- -              len  += size;
- -              to   += size;
- -              addr += size;
- -
- -      } while (len < n);
- -
- -      return len;
- -}
- -
   #ifdef CONFIG_COMPAT
   static inline int
   perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
diff --combined arch/x86/kernel/cpu/perf_event_intel.c

index f168b4030d406dbb7a84f8c873501b088c8dc823,9c794ac87837622a515094c34cd5b1eb070241b6..a099df96f9168000720480c96f0aaf5e1b3a80b3
--- 1/arch/x86/kernel/cpu/perf_event_intel.c
--- 2/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@@ -88,7 -88,7 +88,7 @@@ static u64 intel_pmu_event_map(int hw_e
         return intel_perfmon_event_map[hw_event];
   }
   
- -static __initconst u64 westmere_hw_cache_event_ids
+ +static __initconst const u64 westmere_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@@ -179,7 -179,7 +179,7 @@@
    },
   };
   
- -static __initconst u64 nehalem_hw_cache_event_ids
+ +static __initconst const u64 nehalem_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@@ -270,7 -270,7 +270,7 @@@
    },
   };
   
- -static __initconst u64 core2_hw_cache_event_ids
+ +static __initconst const u64 core2_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@@ -361,7 -361,7 +361,7 @@@
    },
   };
   
- -static __initconst u64 atom_hw_cache_event_ids
+ +static __initconst const u64 atom_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@@ -452,6 -452,60 +452,6 @@@
    },
   };
   
- -static u64 intel_pmu_raw_event(u64 hw_event)
- -{
- -#define CORE_EVNTSEL_EVENT_MASK               0x000000FFULL
- -#define CORE_EVNTSEL_UNIT_MASK                0x0000FF00ULL
- -#define CORE_EVNTSEL_EDGE_MASK                0x00040000ULL
- -#define CORE_EVNTSEL_INV_MASK         0x00800000ULL
- -#define CORE_EVNTSEL_REG_MASK         0xFF000000ULL
- -
- -#define CORE_EVNTSEL_MASK             \
- -      (INTEL_ARCH_EVTSEL_MASK |       \
- -       INTEL_ARCH_UNIT_MASK   |       \
- -       INTEL_ARCH_EDGE_MASK   |       \
- -       INTEL_ARCH_INV_MASK    |       \
- -       INTEL_ARCH_CNT_MASK)
- -
- -      return hw_event & CORE_EVNTSEL_MASK;
- -}
- -
- -static void intel_pmu_enable_bts(u64 config)
- -{
- -      unsigned long debugctlmsr;
- -
- -      debugctlmsr = get_debugctlmsr();
- -
- -      debugctlmsr |= X86_DEBUGCTL_TR;
- -      debugctlmsr |= X86_DEBUGCTL_BTS;
- -      debugctlmsr |= X86_DEBUGCTL_BTINT;
- -
- -      if (!(config & ARCH_PERFMON_EVENTSEL_OS))
- -              debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
- -
- -      if (!(config & ARCH_PERFMON_EVENTSEL_USR))
- -              debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
- -
- -      update_debugctlmsr(debugctlmsr);
- -}
- -
- -static void intel_pmu_disable_bts(void)
- -{
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      unsigned long debugctlmsr;
- -
- -      if (!cpuc->ds)
- -              return;
- -
- -      debugctlmsr = get_debugctlmsr();
- -
- -      debugctlmsr &=
- -              ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
- -                X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
- -
- -      update_debugctlmsr(debugctlmsr);
- -}
- -
   static void intel_pmu_disable_all(void)
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@@ -460,17 -514,12 +460,17 @@@
   
         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
                 intel_pmu_disable_bts();
+ +
+ +      intel_pmu_pebs_disable_all();
+ +      intel_pmu_lbr_disable_all();
   }
   
- -static void intel_pmu_enable_all(void)
+ +static void intel_pmu_enable_all(int added)
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
   
+ +      intel_pmu_pebs_enable_all();
+ +      intel_pmu_lbr_enable_all();
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
   
         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@@ -484,41 -533,6 +484,41 @@@
         }
   }
   
+ +/*
+ + * Workaround for:
+ + *   Intel Errata AAK100 (model 26)
+ + *   Intel Errata AAP53  (model 30)
+ + *   Intel Errata BD53   (model 44)
+ + *
+ + * These chips need to be 'reset' when adding counters by programming
+ + * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
+ + * either in sequence on the same PMC or on different PMCs.
+ + */
+ +static void intel_pmu_nhm_enable_all(int added)
+ +{
+ +      if (added) {
+ +              struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +              int i;
+ +
+ +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
+ +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
+ +              wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+ +
+ +              wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+ +              wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ +
+ +              for (i = 0; i < 3; i++) {
+ +                      struct perf_event *event = cpuc->events[i];
+ +
+ +                      if (!event)
+ +                              continue;
+ +
+ +                      __x86_pmu_enable_event(&event->hw);
+ +              }
+ +      }
+ +      intel_pmu_enable_all(added);
+ +}
+ +
   static inline u64 intel_pmu_get_status(void)
   {
         u64 status;
@@@ -533,7 -547,8 +533,7 @@@ static inline void intel_pmu_ack_status
         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
   }
   
- -static inline void
- -intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+ +static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
   {
         int idx = hwc->idx - X86_PMC_IDX_FIXED;
         u64 ctrl_val, mask;
@@@ -542,10 -557,71 +542,10 @@@
   
         rdmsrl(hwc->config_base, ctrl_val);
         ctrl_val &= ~mask;
- -      (void)checking_wrmsrl(hwc->config_base, ctrl_val);
- -}
- -
- -static void intel_pmu_drain_bts_buffer(void)
- -{
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      struct debug_store *ds = cpuc->ds;
- -      struct bts_record {
- -              u64     from;
- -              u64     to;
- -              u64     flags;
- -      };
- -      struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
- -      struct bts_record *at, *top;
- -      struct perf_output_handle handle;
- -      struct perf_event_header header;
- -      struct perf_sample_data data;
- -      struct pt_regs regs;
- -
- -      if (!event)
- -              return;
- -
- -      if (!ds)
- -              return;
- -
- -      at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
- -      top = (struct bts_record *)(unsigned long)ds->bts_index;
- -
- -      if (top <= at)
- -              return;
- -
- -      ds->bts_index = ds->bts_buffer_base;
- -
- -      perf_sample_data_init(&data, 0);
- -
- -      data.period     = event->hw.last_period;
- -      regs.ip         = 0;
- -
- -      /*
- -       * Prepare a generic sample, i.e. fill in the invariant fields.
- -       * We will overwrite the from and to address before we output
- -       * the sample.
- -       */
- -      perf_prepare_sample(&header, &data, event, &regs);
- -
- -      if (perf_output_begin(&handle, event,
- -                            header.size * (top - at), 1, 1))
- -              return;
- -
- -      for (; at < top; at++) {
- -              data.ip         = at->from;
- -              data.addr       = at->to;
- -
- -              perf_output_sample(&handle, &header, &data, event);
- -      }
- -
- -      perf_output_end(&handle);
- -
- -      /* There's new data available. */
- -      event->hw.interrupts++;
- -      event->pending_kill = POLL_IN;
+ +      wrmsrl(hwc->config_base, ctrl_val);
   }
   
- -static inline void
- -intel_pmu_disable_event(struct perf_event *event)
+ +static void intel_pmu_disable_event(struct perf_event *event)
   {
         struct hw_perf_event *hwc = &event->hw;
   
@@@ -561,15 -637,14 +561,15 @@@
         }
   
         x86_pmu_disable_event(event);
+ +
+ +      if (unlikely(event->attr.precise))
+ +              intel_pmu_pebs_disable(event);
   }
   
- -static inline void
- -intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+ +static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
   {
         int idx = hwc->idx - X86_PMC_IDX_FIXED;
         u64 ctrl_val, bits, mask;
- -      int err;
   
         /*
          * Enable IRQ generation (0x8),
@@@ -594,7 -669,7 +594,7 @@@
         rdmsrl(hwc->config_base, ctrl_val);
         ctrl_val &= ~mask;
         ctrl_val |= bits;
- -      err = checking_wrmsrl(hwc->config_base, ctrl_val);
+ +      wrmsrl(hwc->config_base, ctrl_val);
   }
   
   static void intel_pmu_enable_event(struct perf_event *event)
@@@ -614,9 -689,6 +614,9 @@@
                 return;
         }
   
+ +      if (unlikely(event->attr.precise))
+ +              intel_pmu_pebs_enable(event);
+ +
         __x86_pmu_enable_event(hwc);
   }
   
@@@ -636,20 -708,20 +636,20 @@@ static void intel_pmu_reset(void
         unsigned long flags;
         int idx;
   
- -      if (!x86_pmu.num_events)
+ +      if (!x86_pmu.num_counters)
                 return;
   
         local_irq_save(flags);
   
         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
   
- -      for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
         }
- -      for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+ +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
- -      }
+ +
         if (ds)
                 ds->bts_index = ds->bts_buffer_base;
   
@@@ -675,7 -747,7 +675,7 @@@ static int intel_pmu_handle_irq(struct 
         intel_pmu_drain_bts_buffer();
         status = intel_pmu_get_status();
         if (!status) {
- -              intel_pmu_enable_all();
+ +              intel_pmu_enable_all(0);
                 return 0;
         }
   
@@@ -690,15 -762,6 +690,15 @@@ again
   
         inc_irq_stat(apic_perf_irqs);
         ack = status;
+ +
+ +      intel_pmu_lbr_read();
+ +
+ +      /*
+ +       * PEBS overflow sets bit 62 in the global status register
+ +       */
+ +      if (__test_and_clear_bit(62, (unsigned long *)&status))
+ +              x86_pmu.drain_pebs(regs);
+ +
         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
                 struct perf_event *event = cpuc->events[bit];
   
@@@ -724,22 -787,26 +724,22 @@@
                 goto again;
   
   done:
- -      intel_pmu_enable_all();
+ +      intel_pmu_enable_all(0);
         return 1;
   }
   
- -static struct event_constraint bts_constraint =
- -      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
- -
   static struct event_constraint *
- -intel_special_constraints(struct perf_event *event)
+ +intel_bts_constraints(struct perf_event *event)
   {
- -      unsigned int hw_event;
- -
- -      hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+ +      struct hw_perf_event *hwc = &event->hw;
+ +      unsigned int hw_event, bts_event;
   
- -      if (unlikely((hw_event ==
- -                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
- -                   (event->hw.sample_period == 1))) {
+ +      hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+ +      bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
   
+ +      if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
                 return &bts_constraint;
- -      }
+ +
         return NULL;
   }
   
@@@ -748,53 -815,24 +748,53 @@@ intel_get_event_constraints(struct cpu_
   {
         struct event_constraint *c;
   
- -      c = intel_special_constraints(event);
+ +      c = intel_bts_constraints(event);
+ +      if (c)
+ +              return c;
+ +
+ +      c = intel_pebs_constraints(event);
         if (c)
                 return c;
   
         return x86_get_event_constraints(cpuc, event);
   }
   
- -static __initconst struct x86_pmu core_pmu = {
+ +static int intel_pmu_hw_config(struct perf_event *event)
+ +{
+ +      int ret = x86_pmu_hw_config(event);
+ +
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (event->attr.type != PERF_TYPE_RAW)
+ +              return 0;
+ +
+ +      if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+ +              return 0;
+ +
+ +      if (x86_pmu.version < 3)
+ +              return -EINVAL;
+ +
+ +      if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+ +              return -EACCES;
+ +
+ +      event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+ +
+ +      return 0;
+ +}
+ +
+ +static __initconst const struct x86_pmu core_pmu = {
         .name                   = "core",
         .handle_irq             = x86_pmu_handle_irq,
         .disable_all            = x86_pmu_disable_all,
         .enable_all             = x86_pmu_enable_all,
         .enable                 = x86_pmu_enable_event,
         .disable                = x86_pmu_disable_event,
+ +      .hw_config              = x86_pmu_hw_config,
+ +      .schedule_events        = x86_schedule_events,
         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
         .event_map              = intel_pmu_event_map,
- -      .raw_event              = intel_pmu_raw_event,
         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
         .apic                   = 1,
         /*
@@@ -807,32 -845,17 +807,32 @@@
         .event_constraints      = intel_core_event_constraints,
   };
   
- -static __initconst struct x86_pmu intel_pmu = {
+ +static void intel_pmu_cpu_starting(int cpu)
+ +{
+ +      init_debug_store_on_cpu(cpu);
+ +      /*
+ +       * Deal with CPUs that don't clear their LBRs on power-up.
+ +       */
+ +      intel_pmu_lbr_reset();
+ +}
+ +
+ +static void intel_pmu_cpu_dying(int cpu)
+ +{
+ +      fini_debug_store_on_cpu(cpu);
+ +}
+ +
+ +static __initconst const struct x86_pmu intel_pmu = {
         .name                   = "Intel",
         .handle_irq             = intel_pmu_handle_irq,
         .disable_all            = intel_pmu_disable_all,
         .enable_all             = intel_pmu_enable_all,
         .enable                 = intel_pmu_enable_event,
         .disable                = intel_pmu_disable_event,
+ +      .hw_config              = intel_pmu_hw_config,
+ +      .schedule_events        = x86_schedule_events,
         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
         .event_map              = intel_pmu_event_map,
- -      .raw_event              = intel_pmu_raw_event,
         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
         .apic                   = 1,
         /*
@@@ -841,38 -864,14 +841,38 @@@
          * the generic event period:
          */
         .max_period             = (1ULL << 31) - 1,
- -      .enable_bts             = intel_pmu_enable_bts,
- -      .disable_bts            = intel_pmu_disable_bts,
         .get_event_constraints  = intel_get_event_constraints,
   
- -      .cpu_starting           = init_debug_store_on_cpu,
- -      .cpu_dying              = fini_debug_store_on_cpu,
+ +      .cpu_starting           = intel_pmu_cpu_starting,
+ +      .cpu_dying              = intel_pmu_cpu_dying,
   };
   
+ +static void intel_clovertown_quirks(void)
+ +{
+ +      /*
+ +       * PEBS is unreliable due to:
+ +       *
+ +       *   AJ67  - PEBS may experience CPL leaks
+ +       *   AJ68  - PEBS PMI may be delayed by one event
+ +       *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
+ +       *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
+ +       *
+ +       * AJ67 could be worked around by restricting the OS/USR flags.
+ +       * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
+ +       *
+ +       * AJ106 could possibly be worked around by not allowing LBR
+ +       *       usage from PEBS, including the fixup.
+ +       * AJ68  could possibly be worked around by always programming
+ +       *       a pebs_event_reset[0] value and coping with the lost events.
+ +       *
+ +       * But taken together it might just make sense to not enable PEBS on
+ +       * these chips.
+ +       */
+ +      printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+ +      x86_pmu.pebs = 0;
+ +      x86_pmu.pebs_constraints = NULL;
+ +}
+ +
   static __init int intel_pmu_init(void)
   {
         union cpuid10_edx edx;
@@@ -882,13 -881,12 +882,13 @@@
         int version;
   
         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- -              /* check for P6 processor family */
- -         if (boot_cpu_data.x86 == 6) {
- -              return p6_pmu_init();
- -         } else {
+ +              switch (boot_cpu_data.x86) {
+ +              case 0x6:
+ +                      return p6_pmu_init();
+ +              case 0xf:
+ +                      return p4_pmu_init();
+ +              }
                 return -ENODEV;
- -         }
         }
   
         /*
@@@ -906,28 -904,16 +906,28 @@@
                 x86_pmu = intel_pmu;
   
         x86_pmu.version                 = version;
- -      x86_pmu.num_events              = eax.split.num_events;
- -      x86_pmu.event_bits              = eax.split.bit_width;
- -      x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
+ +      x86_pmu.num_counters            = eax.split.num_counters;
+ +      x86_pmu.cntval_bits             = eax.split.bit_width;
+ +      x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
   
         /*
          * Quirk: v2 perfmon does not report fixed-purpose events, so
          * assume at least 3 events:
          */
         if (version > 1)
- -              x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+ +              x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+ +
+ +      /*
+ +       * v2 and above have a perf capabilities MSR
+ +       */
+ +      if (version > 1) {
+ +              u64 capabilities;
+ +
+ +              rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+ +              x86_pmu.intel_cap.capabilities = capabilities;
+ +      }
+ +
+ +      intel_ds_init();
   
         /*
          * Install the hw-cache-events table:
@@@ -938,37 -924,29 +938,38 @@@
                 break;
   
         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+ +              x86_pmu.quirks = intel_clovertown_quirks;
         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
         case 29: /* six-core 45 nm xeon "Dunnington" */
                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              intel_pmu_lbr_init_core();
+ +
                 x86_pmu.event_constraints = intel_core2_event_constraints;
                 pr_cont("Core2 events, ");
                 break;
   
         case 26: /* 45 nm nehalem, "Bloomfield" */
         case 30: /* 45 nm nehalem, "Lynnfield" */
+       case 46: /* 45 nm nehalem-ex, "Beckton" */
                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              intel_pmu_lbr_init_nhm();
+ +
                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
- -              pr_cont("Nehalem/Corei7 events, ");
+ +              x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ +              pr_cont("Nehalem events, ");
                 break;
+ +
         case 28: /* Atom */
                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              intel_pmu_lbr_init_atom();
+ +
                 x86_pmu.event_constraints = intel_gen_event_constraints;
                 pr_cont("Atom events, ");
                 break;
@@@ -978,10 -956,7 +979,10 @@@
                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              intel_pmu_lbr_init_nhm();
+ +
                 x86_pmu.event_constraints = intel_westmere_event_constraints;
+ +              x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                 pr_cont("Westmere events, ");
                 break;
   
diff --combined arch/x86/kernel/cpu/perf_event_intel_ds.c

index 2fea3622af7fde7264d4cdce883ef8c3aadd9bf7,0000000000000000000000000000000000000000..ec8b2e12e10452b2a67ffd88bb02067672c285e4

mode 100644,000000..100644
--- 1/arch/x86/kernel/cpu/perf_event_intel_ds.c
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@@ -1,664 -1,0 +1,664 @@@
-               for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+ +#ifdef CONFIG_CPU_SUP_INTEL
+ +
+ +/* The maximal number of PEBS events: */
+ +#define MAX_PEBS_EVENTS               4
+ +
+ +/* The size of a BTS record in bytes: */
+ +#define BTS_RECORD_SIZE               24
+ +
+ +#define BTS_BUFFER_SIZE               (PAGE_SIZE << 4)
+ +#define PEBS_BUFFER_SIZE      PAGE_SIZE
+ +
+ +/*
+ + * pebs_record_32 for p4 and core not supported
+ +
+ +struct pebs_record_32 {
+ +      u32 flags, ip;
+ +      u32 ax, bc, cx, dx;
+ +      u32 si, di, bp, sp;
+ +};
+ +
+ + */
+ +
+ +struct pebs_record_core {
+ +      u64 flags, ip;
+ +      u64 ax, bx, cx, dx;
+ +      u64 si, di, bp, sp;
+ +      u64 r8,  r9,  r10, r11;
+ +      u64 r12, r13, r14, r15;
+ +};
+ +
+ +struct pebs_record_nhm {
+ +      u64 flags, ip;
+ +      u64 ax, bx, cx, dx;
+ +      u64 si, di, bp, sp;
+ +      u64 r8,  r9,  r10, r11;
+ +      u64 r12, r13, r14, r15;
+ +      u64 status, dla, dse, lat;
+ +};
+ +
+ +/*
+ + * A debug store configuration.
+ + *
+ + * We only support architectures that use 64bit fields.
+ + */
+ +struct debug_store {
+ +      u64     bts_buffer_base;
+ +      u64     bts_index;
+ +      u64     bts_absolute_maximum;
+ +      u64     bts_interrupt_threshold;
+ +      u64     pebs_buffer_base;
+ +      u64     pebs_index;
+ +      u64     pebs_absolute_maximum;
+ +      u64     pebs_interrupt_threshold;
+ +      u64     pebs_event_reset[MAX_PEBS_EVENTS];
+ +};
+ +
+ +static void init_debug_store_on_cpu(int cpu)
+ +{
+ +      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ +
+ +      if (!ds)
+ +              return;
+ +
+ +      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+ +                   (u32)((u64)(unsigned long)ds),
+ +                   (u32)((u64)(unsigned long)ds >> 32));
+ +}
+ +
+ +static void fini_debug_store_on_cpu(int cpu)
+ +{
+ +      if (!per_cpu(cpu_hw_events, cpu).ds)
+ +              return;
+ +
+ +      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+ +}
+ +
+ +static void release_ds_buffers(void)
+ +{
+ +      int cpu;
+ +
+ +      if (!x86_pmu.bts && !x86_pmu.pebs)
+ +              return;
+ +
+ +      get_online_cpus();
+ +
+ +      for_each_online_cpu(cpu)
+ +              fini_debug_store_on_cpu(cpu);
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ +
+ +              if (!ds)
+ +                      continue;
+ +
+ +              per_cpu(cpu_hw_events, cpu).ds = NULL;
+ +
+ +              kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ +              kfree((void *)(unsigned long)ds->bts_buffer_base);
+ +              kfree(ds);
+ +      }
+ +
+ +      put_online_cpus();
+ +}
+ +
+ +static int reserve_ds_buffers(void)
+ +{
+ +      int cpu, err = 0;
+ +
+ +      if (!x86_pmu.bts && !x86_pmu.pebs)
+ +              return 0;
+ +
+ +      get_online_cpus();
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              struct debug_store *ds;
+ +              void *buffer;
+ +              int max, thresh;
+ +
+ +              err = -ENOMEM;
+ +              ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+ +              if (unlikely(!ds))
+ +                      break;
+ +              per_cpu(cpu_hw_events, cpu).ds = ds;
+ +
+ +              if (x86_pmu.bts) {
+ +                      buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+ +                      if (unlikely(!buffer))
+ +                              break;
+ +
+ +                      max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ +                      thresh = max / 16;
+ +
+ +                      ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ +                      ds->bts_index = ds->bts_buffer_base;
+ +                      ds->bts_absolute_maximum = ds->bts_buffer_base +
+ +                              max * BTS_RECORD_SIZE;
+ +                      ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ +                              thresh * BTS_RECORD_SIZE;
+ +              }
+ +
+ +              if (x86_pmu.pebs) {
+ +                      buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
+ +                      if (unlikely(!buffer))
+ +                              break;
+ +
+ +                      max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+ +
+ +                      ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ +                      ds->pebs_index = ds->pebs_buffer_base;
+ +                      ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+ +                              max * x86_pmu.pebs_record_size;
+ +                      /*
+ +                       * Always use single record PEBS
+ +                       */
+ +                      ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+ +                              x86_pmu.pebs_record_size;
+ +              }
+ +
+ +              err = 0;
+ +      }
+ +
+ +      if (err)
+ +              release_ds_buffers();
+ +      else {
+ +              for_each_online_cpu(cpu)
+ +                      init_debug_store_on_cpu(cpu);
+ +      }
+ +
+ +      put_online_cpus();
+ +
+ +      return err;
+ +}
+ +
+ +/*
+ + * BTS
+ + */
+ +
+ +static struct event_constraint bts_constraint =
+ +      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+ +
+ +static void intel_pmu_enable_bts(u64 config)
+ +{
+ +      unsigned long debugctlmsr;
+ +
+ +      debugctlmsr = get_debugctlmsr();
+ +
+ +      debugctlmsr |= DEBUGCTLMSR_TR;
+ +      debugctlmsr |= DEBUGCTLMSR_BTS;
+ +      debugctlmsr |= DEBUGCTLMSR_BTINT;
+ +
+ +      if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+ +              debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
+ +
+ +      if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+ +              debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
+ +
+ +      update_debugctlmsr(debugctlmsr);
+ +}
+ +
+ +static void intel_pmu_disable_bts(void)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      unsigned long debugctlmsr;
+ +
+ +      if (!cpuc->ds)
+ +              return;
+ +
+ +      debugctlmsr = get_debugctlmsr();
+ +
+ +      debugctlmsr &=
+ +              ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
+ +                DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
+ +
+ +      update_debugctlmsr(debugctlmsr);
+ +}
+ +
+ +static void intel_pmu_drain_bts_buffer(void)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct debug_store *ds = cpuc->ds;
+ +      struct bts_record {
+ +              u64     from;
+ +              u64     to;
+ +              u64     flags;
+ +      };
+ +      struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+ +      struct bts_record *at, *top;
+ +      struct perf_output_handle handle;
+ +      struct perf_event_header header;
+ +      struct perf_sample_data data;
+ +      struct pt_regs regs;
+ +
+ +      if (!event)
+ +              return;
+ +
+ +      if (!ds)
+ +              return;
+ +
+ +      at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+ +      top = (struct bts_record *)(unsigned long)ds->bts_index;
+ +
+ +      if (top <= at)
+ +              return;
+ +
+ +      ds->bts_index = ds->bts_buffer_base;
+ +
+ +      perf_sample_data_init(&data, 0);
+ +      data.period = event->hw.last_period;
+ +      regs.ip     = 0;
+ +
+ +      /*
+ +       * Prepare a generic sample, i.e. fill in the invariant fields.
+ +       * We will overwrite the from and to address before we output
+ +       * the sample.
+ +       */
+ +      perf_prepare_sample(&header, &data, event, &regs);
+ +
+ +      if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
+ +              return;
+ +
+ +      for (; at < top; at++) {
+ +              data.ip         = at->from;
+ +              data.addr       = at->to;
+ +
+ +              perf_output_sample(&handle, &header, &data, event);
+ +      }
+ +
+ +      perf_output_end(&handle);
+ +
+ +      /* There's new data available. */
+ +      event->hw.interrupts++;
+ +      event->pending_kill = POLL_IN;
+ +}
+ +
+ +/*
+ + * PEBS
+ + */
+ +
+ +static struct event_constraint intel_core_pebs_events[] = {
+ +      PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+ +      PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
+ +      EVENT_CONSTRAINT_END
+ +};
+ +
+ +static struct event_constraint intel_nehalem_pebs_events[] = {
+ +      PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
+ +      PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
+ +      PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
+ +      PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
+ +      EVENT_CONSTRAINT_END
+ +};
+ +
+ +static struct event_constraint *
+ +intel_pebs_constraints(struct perf_event *event)
+ +{
+ +      struct event_constraint *c;
+ +
+ +      if (!event->attr.precise)
+ +              return NULL;
+ +
+ +      if (x86_pmu.pebs_constraints) {
+ +              for_each_event_constraint(c, x86_pmu.pebs_constraints) {
+ +                      if ((event->hw.config & c->cmask) == c->code)
+ +                              return c;
+ +              }
+ +      }
+ +
+ +      return &emptyconstraint;
+ +}
+ +
+ +static void intel_pmu_pebs_enable(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct hw_perf_event *hwc = &event->hw;
+ +
+ +      hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+ +
+ +      cpuc->pebs_enabled |= 1ULL << hwc->idx;
+ +      WARN_ON_ONCE(cpuc->enabled);
+ +
+ +      if (x86_pmu.intel_cap.pebs_trap)
+ +              intel_pmu_lbr_enable(event);
+ +}
+ +
+ +static void intel_pmu_pebs_disable(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct hw_perf_event *hwc = &event->hw;
+ +
+ +      cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+ +      if (cpuc->enabled)
+ +              wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ +
+ +      hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+ +
+ +      if (x86_pmu.intel_cap.pebs_trap)
+ +              intel_pmu_lbr_disable(event);
+ +}
+ +
+ +static void intel_pmu_pebs_enable_all(void)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +
+ +      if (cpuc->pebs_enabled)
+ +              wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ +}
+ +
+ +static void intel_pmu_pebs_disable_all(void)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +
+ +      if (cpuc->pebs_enabled)
+ +              wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+ +}
+ +
+ +#include <asm/insn.h>
+ +
+ +static inline bool kernel_ip(unsigned long ip)
+ +{
+ +#ifdef CONFIG_X86_32
+ +      return ip > PAGE_OFFSET;
+ +#else
+ +      return (long)ip < 0;
+ +#endif
+ +}
+ +
+ +static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      unsigned long from = cpuc->lbr_entries[0].from;
+ +      unsigned long old_to, to = cpuc->lbr_entries[0].to;
+ +      unsigned long ip = regs->ip;
+ +
+ +      /*
+ +       * We don't need to fixup if the PEBS assist is fault like
+ +       */
+ +      if (!x86_pmu.intel_cap.pebs_trap)
+ +              return 1;
+ +
+ +      /*
+ +       * No LBR entry, no basic block, no rewinding
+ +       */
+ +      if (!cpuc->lbr_stack.nr || !from || !to)
+ +              return 0;
+ +
+ +      /*
+ +       * Basic blocks should never cross user/kernel boundaries
+ +       */
+ +      if (kernel_ip(ip) != kernel_ip(to))
+ +              return 0;
+ +
+ +      /*
+ +       * unsigned math, either ip is before the start (impossible) or
+ +       * the basic block is larger than 1 page (sanity)
+ +       */
+ +      if ((ip - to) > PAGE_SIZE)
+ +              return 0;
+ +
+ +      /*
+ +       * We sampled a branch insn, rewind using the LBR stack
+ +       */
+ +      if (ip == to) {
+ +              regs->ip = from;
+ +              return 1;
+ +      }
+ +
+ +      do {
+ +              struct insn insn;
+ +              u8 buf[MAX_INSN_SIZE];
+ +              void *kaddr;
+ +
+ +              old_to = to;
+ +              if (!kernel_ip(ip)) {
+ +                      int bytes, size = MAX_INSN_SIZE;
+ +
+ +                      bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+ +                      if (bytes != size)
+ +                              return 0;
+ +
+ +                      kaddr = buf;
+ +              } else
+ +                      kaddr = (void *)to;
+ +
+ +              kernel_insn_init(&insn, kaddr);
+ +              insn_get_length(&insn);
+ +              to += insn.length;
+ +      } while (to < ip);
+ +
+ +      if (to == ip) {
+ +              regs->ip = old_to;
+ +              return 1;
+ +      }
+ +
+ +      /*
+ +       * Even though we decoded the basic block, the instruction stream
+ +       * never matched the given IP, either the TO or the IP got corrupted.
+ +       */
+ +      return 0;
+ +}
+ +
+ +static int intel_pmu_save_and_restart(struct perf_event *event);
+ +
+ +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct debug_store *ds = cpuc->ds;
+ +      struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+ +      struct pebs_record_core *at, *top;
+ +      struct perf_sample_data data;
+ +      struct perf_raw_record raw;
+ +      struct pt_regs regs;
+ +      int n;
+ +
+ +      if (!ds || !x86_pmu.pebs)
+ +              return;
+ +
+ +      at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+ +      top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+ +
+ +      /*
+ +       * Whatever else happens, drain the thing
+ +       */
+ +      ds->pebs_index = ds->pebs_buffer_base;
+ +
+ +      if (!test_bit(0, cpuc->active_mask))
+ +              return;
+ +
+ +      WARN_ON_ONCE(!event);
+ +
+ +      if (!event->attr.precise)
+ +              return;
+ +
+ +      n = top - at;
+ +      if (n <= 0)
+ +              return;
+ +
+ +      if (!intel_pmu_save_and_restart(event))
+ +              return;
+ +
+ +      /*
+ +       * Should not happen, we program the threshold at 1 and do not
+ +       * set a reset value.
+ +       */
+ +      WARN_ON_ONCE(n > 1);
+ +      at += n - 1;
+ +
+ +      perf_sample_data_init(&data, 0);
+ +      data.period = event->hw.last_period;
+ +
+ +      if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ +              raw.size = x86_pmu.pebs_record_size;
+ +              raw.data = at;
+ +              data.raw = &raw;
+ +      }
+ +
+ +      /*
+ +       * We use the interrupt regs as a base because the PEBS record
+ +       * does not contain a full regs set, specifically it seems to
+ +       * lack segment descriptors, which get used by things like
+ +       * user_mode().
+ +       *
+ +       * In the simple case fix up only the IP and BP,SP regs, for
+ +       * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
+ +       * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+ +       */
+ +      regs = *iregs;
+ +      regs.ip = at->ip;
+ +      regs.bp = at->bp;
+ +      regs.sp = at->sp;
+ +
+ +      if (intel_pmu_pebs_fixup_ip(&regs))
+ +              regs.flags |= PERF_EFLAGS_EXACT;
+ +      else
+ +              regs.flags &= ~PERF_EFLAGS_EXACT;
+ +
+ +      if (perf_event_overflow(event, 1, &data, &regs))
+ +              x86_pmu_stop(event);
+ +}
+ +
+ +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct debug_store *ds = cpuc->ds;
+ +      struct pebs_record_nhm *at, *top;
+ +      struct perf_sample_data data;
+ +      struct perf_event *event = NULL;
+ +      struct perf_raw_record raw;
+ +      struct pt_regs regs;
+ +      u64 status = 0;
+ +      int bit, n;
+ +
+ +      if (!ds || !x86_pmu.pebs)
+ +              return;
+ +
+ +      at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ +      top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+ +
+ +      ds->pebs_index = ds->pebs_buffer_base;
+ +
+ +      n = top - at;
+ +      if (n <= 0)
+ +              return;
+ +
+ +      /*
+ +       * Should not happen, we program the threshold at 1 and do not
+ +       * set a reset value.
+ +       */
+ +      WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+ +
+ +      for ( ; at < top; at++) {
++              for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+ +                      event = cpuc->events[bit];
+ +                      if (!test_bit(bit, cpuc->active_mask))
+ +                              continue;
+ +
+ +                      WARN_ON_ONCE(!event);
+ +
+ +                      if (!event->attr.precise)
+ +                              continue;
+ +
+ +                      if (__test_and_set_bit(bit, (unsigned long *)&status))
+ +                              continue;
+ +
+ +                      break;
+ +              }
+ +
+ +              if (!event || bit >= MAX_PEBS_EVENTS)
+ +                      continue;
+ +
+ +              if (!intel_pmu_save_and_restart(event))
+ +                      continue;
+ +
+ +              perf_sample_data_init(&data, 0);
+ +              data.period = event->hw.last_period;
+ +
+ +              if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ +                      raw.size = x86_pmu.pebs_record_size;
+ +                      raw.data = at;
+ +                      data.raw = &raw;
+ +              }
+ +
+ +              /*
+ +               * See the comment in intel_pmu_drain_pebs_core()
+ +               */
+ +              regs = *iregs;
+ +              regs.ip = at->ip;
+ +              regs.bp = at->bp;
+ +              regs.sp = at->sp;
+ +
+ +              if (intel_pmu_pebs_fixup_ip(&regs))
+ +                      regs.flags |= PERF_EFLAGS_EXACT;
+ +              else
+ +                      regs.flags &= ~PERF_EFLAGS_EXACT;
+ +
+ +              if (perf_event_overflow(event, 1, &data, &regs))
+ +                      x86_pmu_stop(event);
+ +      }
+ +}
+ +
+ +/*
+ + * BTS, PEBS probe and setup
+ + */
+ +
+ +static void intel_ds_init(void)
+ +{
+ +      /*
+ +       * No support for 32bit formats
+ +       */
+ +      if (!boot_cpu_has(X86_FEATURE_DTES64))
+ +              return;
+ +
+ +      x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
+ +      x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ +      if (x86_pmu.pebs) {
+ +              char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
+ +              int format = x86_pmu.intel_cap.pebs_format;
+ +
+ +              switch (format) {
+ +              case 0:
+ +                      printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
+ +                      x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+ +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
+ +                      x86_pmu.pebs_constraints = intel_core_pebs_events;
+ +                      break;
+ +
+ +              case 1:
+ +                      printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
+ +                      x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
+ +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+ +                      x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
+ +                      break;
+ +
+ +              default:
+ +                      printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
+ +                      x86_pmu.pebs = 0;
+ +                      break;
+ +              }
+ +      }
+ +}
+ +
+ +#else /* CONFIG_CPU_SUP_INTEL */
+ +
+ +static int reserve_ds_buffers(void)
+ +{
+ +      return 0;
+ +}
+ +
+ +static void release_ds_buffers(void)
+ +{
+ +}
+ +
+ +#endif /* CONFIG_CPU_SUP_INTEL */
diff --combined arch/x86/kernel/ptrace.c

index f2fd3b80e565f555a14df09230d2a401b8ca8bed,2e9b55027b7e10e5a8ee31dc270ed7497439384c..055be0afd3305788c6fa529eb17cdfa1958a0a80
--- 1/arch/x86/kernel/ptrace.c
--- 2/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@@ -2,6 -2,9 +2,6 @@@
   /*
    * Pentium III FXSR, SSE support
    *    Gareth Hughes <[email protected]>, May 2000
- - *
- - * BTS tracing
- - *    Markus Metzger <[email protected]>, Dec 2007
    */
   
   #include <linux/kernel.h>
@@@ -9,6 -12,7 +9,7 @@@
   #include <linux/mm.h>
   #include <linux/smp.h>
   #include <linux/errno.h>
+ #include <linux/slab.h>
   #include <linux/ptrace.h>
   #include <linux/regset.h>
   #include <linux/tracehook.h>
@@@ -18,6 -22,7 +19,6 @@@
   #include <linux/audit.h>
   #include <linux/seccomp.h>
   #include <linux/signal.h>
- -#include <linux/workqueue.h>
   #include <linux/perf_event.h>
   #include <linux/hw_breakpoint.h>
   
@@@ -31,6 -36,7 +32,6 @@@
   #include <asm/desc.h>
   #include <asm/prctl.h>
   #include <asm/proto.h>
- -#include <asm/ds.h>
   #include <asm/hw_breakpoint.h>
   
   #include "tls.h"
@@@ -783,6 -789,342 +784,6 @@@ static int ioperm_get(struct task_struc
                                    0, IO_BITMAP_BYTES);
   }
   
- -#ifdef CONFIG_X86_PTRACE_BTS
- -/*
- - * A branch trace store context.
- - *
- - * Contexts may only be installed by ptrace_bts_config() and only for
- - * ptraced tasks.
- - *
- - * Contexts are destroyed when the tracee is detached from the tracer.
- - * The actual destruction work requires interrupts enabled, so the
- - * work is deferred and will be scheduled during __ptrace_unlink().
- - *
- - * Contexts hold an additional task_struct reference on the traced
- - * task, as well as a reference on the tracer's mm.
- - *
- - * Ptrace already holds a task_struct for the duration of ptrace operations,
- - * but since destruction is deferred, it may be executed after both
- - * tracer and tracee exited.
- - */
- -struct bts_context {
- -      /* The branch trace handle. */
- -      struct bts_tracer       *tracer;
- -
- -      /* The buffer used to store the branch trace and its size. */
- -      void                    *buffer;
- -      unsigned int            size;
- -
- -      /* The mm that paid for the above buffer. */
- -      struct mm_struct        *mm;
- -
- -      /* The task this context belongs to. */
- -      struct task_struct      *task;
- -
- -      /* The signal to send on a bts buffer overflow. */
- -      unsigned int            bts_ovfl_signal;
- -
- -      /* The work struct to destroy a context. */
- -      struct work_struct      work;
- -};
- -
- -static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
- -{
- -      void *buffer = NULL;
- -      int err = -ENOMEM;
- -
- -      err = account_locked_memory(current->mm, current->signal->rlim, size);
- -      if (err < 0)
- -              return err;
- -
- -      buffer = kzalloc(size, GFP_KERNEL);
- -      if (!buffer)
- -              goto out_refund;
- -
- -      context->buffer = buffer;
- -      context->size = size;
- -      context->mm = get_task_mm(current);
- -
- -      return 0;
- -
- - out_refund:
- -      refund_locked_memory(current->mm, size);
- -      return err;
- -}
- -
- -static inline void free_bts_buffer(struct bts_context *context)
- -{
- -      if (!context->buffer)
- -              return;
- -
- -      kfree(context->buffer);
- -      context->buffer = NULL;
- -
- -      refund_locked_memory(context->mm, context->size);
- -      context->size = 0;
- -
- -      mmput(context->mm);
- -      context->mm = NULL;
- -}
- -
- -static void free_bts_context_work(struct work_struct *w)
- -{
- -      struct bts_context *context;
- -
- -      context = container_of(w, struct bts_context, work);
- -
- -      ds_release_bts(context->tracer);
- -      put_task_struct(context->task);
- -      free_bts_buffer(context);
- -      kfree(context);
- -}
- -
- -static inline void free_bts_context(struct bts_context *context)
- -{
- -      INIT_WORK(&context->work, free_bts_context_work);
- -      schedule_work(&context->work);
- -}
- -
- -static inline struct bts_context *alloc_bts_context(struct task_struct *task)
- -{
- -      struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
- -      if (context) {
- -              context->task = task;
- -              task->bts = context;
- -
- -              get_task_struct(task);
- -      }
- -
- -      return context;
- -}
- -
- -static int ptrace_bts_read_record(struct task_struct *child, size_t index,
- -                                struct bts_struct __user *out)
- -{
- -      struct bts_context *context;
- -      const struct bts_trace *trace;
- -      struct bts_struct bts;
- -      const unsigned char *at;
- -      int error;
- -
- -      context = child->bts;
- -      if (!context)
- -              return -ESRCH;
- -
- -      trace = ds_read_bts(context->tracer);
- -      if (!trace)
- -              return -ESRCH;
- -
- -      at = trace->ds.top - ((index + 1) * trace->ds.size);
- -      if ((void *)at < trace->ds.begin)
- -              at += (trace->ds.n * trace->ds.size);
- -
- -      if (!trace->read)
- -              return -EOPNOTSUPP;
- -
- -      error = trace->read(context->tracer, at, &bts);
- -      if (error < 0)
- -              return error;
- -
- -      if (copy_to_user(out, &bts, sizeof(bts)))
- -              return -EFAULT;
- -
- -      return sizeof(bts);
- -}
- -
- -static int ptrace_bts_drain(struct task_struct *child,
- -                          long size,
- -                          struct bts_struct __user *out)
- -{
- -      struct bts_context *context;
- -      const struct bts_trace *trace;
- -      const unsigned char *at;
- -      int error, drained = 0;
- -
- -      context = child->bts;
- -      if (!context)
- -              return -ESRCH;
- -
- -      trace = ds_read_bts(context->tracer);
- -      if (!trace)
- -              return -ESRCH;
- -
- -      if (!trace->read)
- -              return -EOPNOTSUPP;
- -
- -      if (size < (trace->ds.top - trace->ds.begin))
- -              return -EIO;
- -
- -      for (at = trace->ds.begin; (void *)at < trace->ds.top;
- -           out++, drained++, at += trace->ds.size) {
- -              struct bts_struct bts;
- -
- -              error = trace->read(context->tracer, at, &bts);
- -              if (error < 0)
- -                      return error;
- -
- -              if (copy_to_user(out, &bts, sizeof(bts)))
- -                      return -EFAULT;
- -      }
- -
- -      memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
- -
- -      error = ds_reset_bts(context->tracer);
- -      if (error < 0)
- -              return error;
- -
- -      return drained;
- -}
- -
- -static int ptrace_bts_config(struct task_struct *child,
- -                           long cfg_size,
- -                           const struct ptrace_bts_config __user *ucfg)
- -{
- -      struct bts_context *context;
- -      struct ptrace_bts_config cfg;
- -      unsigned int flags = 0;
- -
- -      if (cfg_size < sizeof(cfg))
- -              return -EIO;
- -
- -      if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
- -              return -EFAULT;
- -
- -      context = child->bts;
- -      if (!context)
- -              context = alloc_bts_context(child);
- -      if (!context)
- -              return -ENOMEM;
- -
- -      if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
- -              if (!cfg.signal)
- -                      return -EINVAL;
- -
- -              return -EOPNOTSUPP;
- -              context->bts_ovfl_signal = cfg.signal;
- -      }
- -
- -      ds_release_bts(context->tracer);
- -      context->tracer = NULL;
- -
- -      if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
- -              int err;
- -
- -              free_bts_buffer(context);
- -              if (!cfg.size)
- -                      return 0;
- -
- -              err = alloc_bts_buffer(context, cfg.size);
- -              if (err < 0)
- -                      return err;
- -      }
- -
- -      if (cfg.flags & PTRACE_BTS_O_TRACE)
- -              flags |= BTS_USER;
- -
- -      if (cfg.flags & PTRACE_BTS_O_SCHED)
- -              flags |= BTS_TIMESTAMPS;
- -
- -      context->tracer =
- -              ds_request_bts_task(child, context->buffer, context->size,
- -                                  NULL, (size_t)-1, flags);
- -      if (unlikely(IS_ERR(context->tracer))) {
- -              int error = PTR_ERR(context->tracer);
- -
- -              free_bts_buffer(context);
- -              context->tracer = NULL;
- -              return error;
- -      }
- -
- -      return sizeof(cfg);
- -}
- -
- -static int ptrace_bts_status(struct task_struct *child,
- -                           long cfg_size,
- -                           struct ptrace_bts_config __user *ucfg)
- -{
- -      struct bts_context *context;
- -      const struct bts_trace *trace;
- -      struct ptrace_bts_config cfg;
- -
- -      context = child->bts;
- -      if (!context)
- -              return -ESRCH;
- -
- -      if (cfg_size < sizeof(cfg))
- -              return -EIO;
- -
- -      trace = ds_read_bts(context->tracer);
- -      if (!trace)
- -              return -ESRCH;
- -
- -      memset(&cfg, 0, sizeof(cfg));
- -      cfg.size        = trace->ds.end - trace->ds.begin;
- -      cfg.signal      = context->bts_ovfl_signal;
- -      cfg.bts_size    = sizeof(struct bts_struct);
- -
- -      if (cfg.signal)
- -              cfg.flags |= PTRACE_BTS_O_SIGNAL;
- -
- -      if (trace->ds.flags & BTS_USER)
- -              cfg.flags |= PTRACE_BTS_O_TRACE;
- -
- -      if (trace->ds.flags & BTS_TIMESTAMPS)
- -              cfg.flags |= PTRACE_BTS_O_SCHED;
- -
- -      if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
- -              return -EFAULT;
- -
- -      return sizeof(cfg);
- -}
- -
- -static int ptrace_bts_clear(struct task_struct *child)
- -{
- -      struct bts_context *context;
- -      const struct bts_trace *trace;
- -
- -      context = child->bts;
- -      if (!context)
- -              return -ESRCH;
- -
- -      trace = ds_read_bts(context->tracer);
- -      if (!trace)
- -              return -ESRCH;
- -
- -      memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
- -
- -      return ds_reset_bts(context->tracer);
- -}
- -
- -static int ptrace_bts_size(struct task_struct *child)
- -{
- -      struct bts_context *context;
- -      const struct bts_trace *trace;
- -
- -      context = child->bts;
- -      if (!context)
- -              return -ESRCH;
- -
- -      trace = ds_read_bts(context->tracer);
- -      if (!trace)
- -              return -ESRCH;
- -
- -      return (trace->ds.top - trace->ds.begin) / trace->ds.size;
- -}
- -
- -/*
- - * Called from __ptrace_unlink() after the child has been moved back
- - * to its original parent.
- - */
- -void ptrace_bts_untrace(struct task_struct *child)
- -{
- -      if (unlikely(child->bts)) {
- -              free_bts_context(child->bts);
- -              child->bts = NULL;
- -      }
- -}
- -#endif /* CONFIG_X86_PTRACE_BTS */
- -
   /*
    * Called by kernel/ptrace.c when detaching..
    *
@@@ -910,6 -1252,39 +911,6 @@@ long arch_ptrace(struct task_struct *ch
                 break;
   #endif
   
- -      /*
- -       * These bits need more cooking - not enabled yet:
- -       */
- -#ifdef CONFIG_X86_PTRACE_BTS
- -      case PTRACE_BTS_CONFIG:
- -              ret = ptrace_bts_config
- -                      (child, data, (struct ptrace_bts_config __user *)addr);
- -              break;
- -
- -      case PTRACE_BTS_STATUS:
- -              ret = ptrace_bts_status
- -                      (child, data, (struct ptrace_bts_config __user *)addr);
- -              break;
- -
- -      case PTRACE_BTS_SIZE:
- -              ret = ptrace_bts_size(child);
- -              break;
- -
- -      case PTRACE_BTS_GET:
- -              ret = ptrace_bts_read_record
- -                      (child, data, (struct bts_struct __user *) addr);
- -              break;
- -
- -      case PTRACE_BTS_CLEAR:
- -              ret = ptrace_bts_clear(child);
- -              break;
- -
- -      case PTRACE_BTS_DRAIN:
- -              ret = ptrace_bts_drain
- -                      (child, data, (struct bts_struct __user *) addr);
- -              break;
- -#endif /* CONFIG_X86_PTRACE_BTS */
- -
         default:
                 ret = ptrace_request(child, request, addr, data);
                 break;
@@@ -1169,6 -1544,14 +1170,6 @@@ long compat_arch_ptrace(struct task_str
   
         case PTRACE_GET_THREAD_AREA:
         case PTRACE_SET_THREAD_AREA:
- -#ifdef CONFIG_X86_PTRACE_BTS
- -      case PTRACE_BTS_CONFIG:
- -      case PTRACE_BTS_STATUS:
- -      case PTRACE_BTS_SIZE:
- -      case PTRACE_BTS_GET:
- -      case PTRACE_BTS_CLEAR:
- -      case PTRACE_BTS_DRAIN:
- -#endif /* CONFIG_X86_PTRACE_BTS */
                 return arch_ptrace(child, request, addr, data);
   
         default:
diff --combined include/linux/mm.h

index c8442b6551114ec9bce7037ff0487dc305404096,462acaf36f3a3959cca19e2f3c2e66cc01fe5379..fb19bb92b809d81564cf03af26f5a568f2994629
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -19,6 -19,7 +19,6 @@@ struct anon_vma
   struct file_ra_state;
   struct user_struct;
   struct writeback_control;
- -struct rlimit;
   
   #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
   extern unsigned long max_mapnr;
@@@ -782,8 -783,8 +782,8 @@@ struct mm_walk 
         int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
         int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
         int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
-       int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long,
-                            struct mm_walk *);
+       int (*hugetlb_entry)(pte_t *, unsigned long,
+                            unsigned long, unsigned long, struct mm_walk *);
         struct mm_struct *mm;
         void *private;
   };
@@@ -1448,6 -1449,9 +1448,6 @@@ int vmemmap_populate_basepages(struct p
   int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
   void vmemmap_populate_print_last(void);
   
- -extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
- -                               size_t size);
- -extern void refund_locked_memory(struct mm_struct *mm, size_t size);
   
   enum mf_flags {
         MF_COUNT_INCREASED = 1 << 0,
diff --combined kernel/fork.c

index d67f1dbfbe03fe94a05bb97798dc702cf9edee36,44b0791b0a2e378d1ad20e390cbfbec981e7fad8..5d3592deaf71cab3a6dc47acaa38317738e15fee
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -1052,6 -1052,9 +1052,9 @@@ static struct task_struct *copy_process
         p->prev_utime = cputime_zero;
         p->prev_stime = cputime_zero;
   #endif
+ #if defined(SPLIT_RSS_COUNTING)
+       memset(&p->rss_stat, 0, sizeof(p->rss_stat));
+ #endif
   
         p->default_timer_slack_ns = current->timer_slack_ns;
   
@@@ -1108,6 -1111,9 +1111,6 @@@
         p->memcg_batch.do_batch = 0;
         p->memcg_batch.memcg = NULL;
   #endif
- -
- -      p->bts = NULL;
- -
         p->stack_start = stack_start;
   
         /* Perform scheduler related setup. Assign this task to a CPU. */
diff --combined kernel/perf_event.c

index 4aa50ff4efc04829c34215283f9a5134925d6c9e,2f3fbf84215a940cc40eccef9c8304964d10906f..fcf42dcd6089b119732601a037b786e909f2c866
--- 1/kernel/perf_event.c
--- 2/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@@ -15,6 -15,7 +15,7 @@@
   #include <linux/smp.h>
   #include <linux/file.h>
   #include <linux/poll.h>
+ #include <linux/slab.h>
   #include <linux/sysfs.h>
   #include <linux/dcache.h>
   #include <linux/percpu.h>
@@@ -1366,8 -1367,6 +1367,8 @@@ void perf_event_task_sched_in(struct ta
         if (cpuctx->task_ctx == ctx)
                 return;
   
+ +      perf_disable();
+ +
         /*
          * We want to keep the following priority order:
          * cpu pinned (that don't need to move), task pinned,
@@@ -1380,8 -1379,6 +1381,8 @@@
         ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
   
         cpuctx->task_ctx = ctx;
+ +
+ +      perf_enable();
   }
   
   #define MAX_INTERRUPTS (~0ULL)
@@@ -2645,7 -2642,6 +2646,7 @@@ static int perf_fasync(int fd, struct f
   }
   
   static const struct file_operations perf_fops = {
+ +      .llseek                 = no_llseek,
         .release                = perf_release,
         .read                   = perf_read,
         .poll                   = perf_poll,
diff --combined kernel/sched.c

index 1038ca163890eb4625c6751b7cbd611d521af3b6,a3dff1f3f9b0c32070d3ade21f3d2da63708d099..8cafe3ff558fec69a0c3c4676aa61191bb6e84da
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -71,6 -71,7 +71,7 @@@
   #include <linux/debugfs.h>
   #include <linux/ctype.h>
   #include <linux/ftrace.h>
+ #include <linux/slab.h>
   
   #include <asm/tlb.h>
   #include <asm/irq_regs.h>
@@@ -2076,6 -2077,49 +2077,6 @@@ migrate_task(struct task_struct *p, in
         return 1;
   }
   
- -/*
- - * wait_task_context_switch - wait for a thread to complete at least one
- - *                            context switch.
- - *
- - * @p must not be current.
- - */
- -void wait_task_context_switch(struct task_struct *p)
- -{
- -      unsigned long nvcsw, nivcsw, flags;
- -      int running;
- -      struct rq *rq;
- -
- -      nvcsw   = p->nvcsw;
- -      nivcsw  = p->nivcsw;
- -      for (;;) {
- -              /*
- -               * The runqueue is assigned before the actual context
- -               * switch. We need to take the runqueue lock.
- -               *
- -               * We could check initially without the lock but it is
- -               * very likely that we need to take the lock in every
- -               * iteration.
- -               */
- -              rq = task_rq_lock(p, &flags);
- -              running = task_running(rq, p);
- -              task_rq_unlock(rq, &flags);
- -
- -              if (likely(!running))
- -                      break;
- -              /*
- -               * The switch count is incremented before the actual
- -               * context switch. We thus wait for two switches to be
- -               * sure at least one completed.
- -               */
- -              if ((p->nvcsw - nvcsw) > 1)
- -                      break;
- -              if ((p->nivcsw - nivcsw) > 1)
- -                      break;
- -
- -              cpu_relax();
- -      }
- -}
- -
   /*
    * wait_task_inactive - wait for a thread to unschedule.
    *
@@@ -5344,7 -5388,7 +5345,7 @@@ int set_cpus_allowed_ptr(struct task_st
   
                 get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
-               wake_up_process(rq->migration_thread);
+               wake_up_process(mt);
                 put_task_struct(mt);
                 wait_for_completion(&req.done);
                 tlb_migrate_finish(p->mm);
diff --combined kernel/trace/trace_selftest.c

index a7084e7c04270ad0116d6ef54e7184005fa0ab3f,81003b4d617fad5a966c2ed89fba1e5b47ddffdd..1cc9858258b33468627f0c9c787d8401480a152e
--- 1/kernel/trace/trace_selftest.c
--- 2/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@@ -3,6 -3,7 +3,7 @@@
   #include <linux/stringify.h>
   #include <linux/kthread.h>
   #include <linux/delay.h>
+ #include <linux/slab.h>
   
   static inline int trace_valid_entry(struct trace_entry *entry)
   {
@@@ -16,6 -17,7 +17,6 @@@
         case TRACE_BRANCH:
         case TRACE_GRAPH_ENT:
         case TRACE_GRAPH_RET:
- -      case TRACE_HW_BRANCHES:
         case TRACE_KSYM:
                 return 1;
         }
@@@ -753,6 -755,62 +754,6 @@@ trace_selftest_startup_branch(struct tr
   }
   #endif /* CONFIG_BRANCH_TRACER */
   
- -#ifdef CONFIG_HW_BRANCH_TRACER
- -int
- -trace_selftest_startup_hw_branches(struct tracer *trace,
- -                                 struct trace_array *tr)
- -{
- -      struct trace_iterator *iter;
- -      struct tracer tracer;
- -      unsigned long count;
- -      int ret;
- -
- -      if (!trace->open) {
- -              printk(KERN_CONT "missing open function...");
- -              return -1;
- -      }
- -
- -      ret = tracer_init(trace, tr);
- -      if (ret) {
- -              warn_failed_init_tracer(trace, ret);
- -              return ret;
- -      }
- -
- -      /*
- -       * The hw-branch tracer needs to collect the trace from the various
- -       * cpu trace buffers - before tracing is stopped.
- -       */
- -      iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- -      if (!iter)
- -              return -ENOMEM;
- -
- -      memcpy(&tracer, trace, sizeof(tracer));
- -
- -      iter->trace = &tracer;
- -      iter->tr = tr;
- -      iter->pos = -1;
- -      mutex_init(&iter->mutex);
- -
- -      trace->open(iter);
- -
- -      mutex_destroy(&iter->mutex);
- -      kfree(iter);
- -
- -      tracing_stop();
- -
- -      ret = trace_test_buffer(tr, &count);
- -      trace->reset(tr);
- -      tracing_start();
- -
- -      if (!ret && !count) {
- -              printk(KERN_CONT "no entries found..");
- -              ret = -1;
- -      }
- -
- -      return ret;
- -}
- -#endif /* CONFIG_HW_BRANCH_TRACER */
- -
   #ifdef CONFIG_KSYM_TRACER
   static int ksym_selftest_dummy;
author	Ingo Molnar <[email protected]>
	Thu, 8 Apr 2010 11:36:36 +0000 (13:36 +0200)
committer	Ingo Molnar <[email protected]>
	Thu, 8 Apr 2010 11:37:18 +0000 (13:37 +0200)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event_intel.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event_intel_ds.c	patch \|	diff1 \|	\|	blob \| history
arch/x86/kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_selftest.c	patch \|	diff1 \|	diff2 \|	blob \| history