Merge branch 'perf/urgent' into perf/core

author Ingo Molnar <[email protected]>

Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)

committer Ingo Molnar <[email protected]>

Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
author Ingo Molnar <[email protected]>
Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
committer Ingo Molnar <[email protected]>
Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
diff --combined arch/x86/kernel/cpu/perf_event_intel.c

index 353b7a3a2581808fe02660827c0286544c91fed7,f31a1655d1ff5bd602239e211b14bdd28d95a79a..36b5ab884c15662638bb93a837e05a2288c3ee04
--- 1/arch/x86/kernel/cpu/perf_event_intel.c
--- 2/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@@ -190,9 -190,9 +190,9 @@@ static struct extra_reg intel_snbep_ext
         EVENT_EXTRA_END
   };
   
- -EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
- -EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
- -EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+ +EVENT_ATTR_STR(mem-loads,     mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
+ +EVENT_ATTR_STR(mem-loads,     mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
+ +EVENT_ATTR_STR(mem-stores,    mem_st_snb,     "event=0xcd,umask=0x2");
   
   struct attribute *nhm_events_attrs[] = {
         EVENT_PTR(mem_ld_nhm),
@@@ -1184,11 -1184,6 +1184,11 @@@ static void intel_pmu_disable_fixed(str
         wrmsrl(hwc->config_base, ctrl_val);
   }
   
+ +static inline bool event_is_checkpointed(struct perf_event *event)
+ +{
+ +      return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+ +}
+ +
   static void intel_pmu_disable_event(struct perf_event *event)
   {
         struct hw_perf_event *hwc = &event->hw;
@@@ -1202,7 -1197,6 +1202,7 @@@
   
         cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
         cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ +      cpuc->intel_cp_status &= ~(1ull << hwc->idx);
   
         /*
          * must disable before any actual event
@@@ -1277,9 -1271,6 +1277,9 @@@ static void intel_pmu_enable_event(stru
         if (event->attr.exclude_guest)
                 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
   
+ +      if (unlikely(event_is_checkpointed(event)))
+ +              cpuc->intel_cp_status |= (1ull << hwc->idx);
+ +
         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                 intel_pmu_enable_fixed(hwc);
                 return;
@@@ -1298,17 -1289,6 +1298,17 @@@
   int intel_pmu_save_and_restart(struct perf_event *event)
   {
         x86_perf_event_update(event);
+ +      /*
+ +       * For a checkpointed counter always reset back to 0.  This
+ +       * avoids a situation where the counter overflows, aborts the
+ +       * transaction and is then set back to shortly before the
+ +       * overflow, and overflows and aborts again.
+ +       */
+ +      if (unlikely(event_is_checkpointed(event))) {
+ +              /* No race with NMIs because the counter should not be armed */
+ +              wrmsrl(event->hw.event_base, 0);
+ +              local64_set(&event->hw.prev_count, 0);
+ +      }
         return x86_perf_event_set_period(event);
   }
   
@@@ -1392,13 -1372,6 +1392,13 @@@ again
                 x86_pmu.drain_pebs(regs);
         }
   
+ +      /*
+ +       * Checkpointed counters can lead to 'spurious' PMIs because the
+ +       * rollback caused by the PMI will have cleared the overflow status
+ +       * bit. Therefore always force probe these counters.
+ +       */
+ +      status |= cpuc->intel_cp_status;
+ +
         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
                 struct perf_event *event = cpuc->events[bit];
   
@@@ -1864,20 -1837,6 +1864,20 @@@ static int hsw_hw_config(struct perf_ev
               event->attr.precise_ip > 0))
                 return -EOPNOTSUPP;
   
+ +      if (event_is_checkpointed(event)) {
+ +              /*
+ +               * Sampling of checkpointed events can cause situations where
+ +               * the CPU constantly aborts because of a overflow, which is
+ +               * then checkpointed back and ignored. Forbid checkpointing
+ +               * for sampling.
+ +               *
+ +               * But still allow a long sampling period, so that perf stat
+ +               * from KVM works.
+ +               */
+ +              if (event->attr.sample_period > 0 &&
+ +                  event->attr.sample_period < 0x7fffffff)
+ +                      return -EOPNOTSUPP;
+ +      }
         return 0;
   }
   
@@@ -2223,36 -2182,10 +2223,36 @@@ static __init void intel_nehalem_quirk(
         }
   }
   
- -EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
- -EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+ +EVENT_ATTR_STR(mem-loads,     mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+ +EVENT_ATTR_STR(mem-stores,    mem_st_hsw,     "event=0xd0,umask=0x82")
+ +
+ +/* Haswell special events */
+ +EVENT_ATTR_STR(tx-start,      tx_start,       "event=0xc9,umask=0x1");
+ +EVENT_ATTR_STR(tx-commit,     tx_commit,      "event=0xc9,umask=0x2");
+ +EVENT_ATTR_STR(tx-abort,      tx_abort,       "event=0xc9,umask=0x4");
+ +EVENT_ATTR_STR(tx-capacity,   tx_capacity,    "event=0x54,umask=0x2");
+ +EVENT_ATTR_STR(tx-conflict,   tx_conflict,    "event=0x54,umask=0x1");
+ +EVENT_ATTR_STR(el-start,      el_start,       "event=0xc8,umask=0x1");
+ +EVENT_ATTR_STR(el-commit,     el_commit,      "event=0xc8,umask=0x2");
+ +EVENT_ATTR_STR(el-abort,      el_abort,       "event=0xc8,umask=0x4");
+ +EVENT_ATTR_STR(el-capacity,   el_capacity,    "event=0x54,umask=0x2");
+ +EVENT_ATTR_STR(el-conflict,   el_conflict,    "event=0x54,umask=0x1");
+ +EVENT_ATTR_STR(cycles-t,      cycles_t,       "event=0x3c,in_tx=1");
+ +EVENT_ATTR_STR(cycles-ct,     cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
   
   static struct attribute *hsw_events_attrs[] = {
+ +      EVENT_PTR(tx_start),
+ +      EVENT_PTR(tx_commit),
+ +      EVENT_PTR(tx_abort),
+ +      EVENT_PTR(tx_capacity),
+ +      EVENT_PTR(tx_conflict),
+ +      EVENT_PTR(el_start),
+ +      EVENT_PTR(el_commit),
+ +      EVENT_PTR(el_abort),
+ +      EVENT_PTR(el_capacity),
+ +      EVENT_PTR(el_conflict),
+ +      EVENT_PTR(cycles_t),
+ +      EVENT_PTR(cycles_ct),
         EVENT_PTR(mem_ld_hsw),
         EVENT_PTR(mem_st_hsw),
         NULL
@@@ -2392,6 -2325,7 +2392,7 @@@ __init int intel_pmu_init(void
                 break;
   
         case 55: /* Atom 22nm "Silvermont" */
+       case 77: /* Avoton "Silvermont" */
                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                         sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
diff --combined arch/x86/kernel/cpu/perf_event_intel_ds.c

index 54ff6ce519c1d58f62b8f0498bd739b455c4f799,ab3ba1c1b7dd2c425dd5edf4c2b5091d76355b34..07d9a052ee72dfd17845731cdccacff1f72b778d
--- 1/arch/x86/kernel/cpu/perf_event_intel_ds.c
--- 2/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@@ -182,28 -182,16 +182,28 @@@ struct pebs_record_nhm 
    * Same as pebs_record_nhm, with two additional fields.
    */
   struct pebs_record_hsw {
- -      struct pebs_record_nhm nhm;
- -      /*
- -       * Real IP of the event. In the Intel documentation this
- -       * is called eventingrip.
- -       */
- -      u64 real_ip;
- -      /*
- -       * TSX tuning information field: abort cycles and abort flags.
- -       */
- -      u64 tsx_tuning;
+ +      u64 flags, ip;
+ +      u64 ax, bx, cx, dx;
+ +      u64 si, di, bp, sp;
+ +      u64 r8,  r9,  r10, r11;
+ +      u64 r12, r13, r14, r15;
+ +      u64 status, dla, dse, lat;
+ +      u64 real_ip, tsx_tuning;
+ +};
+ +
+ +union hsw_tsx_tuning {
+ +      struct {
+ +              u32 cycles_last_block     : 32,
+ +                  hle_abort             : 1,
+ +                  rtm_abort             : 1,
+ +                  instruction_abort     : 1,
+ +                  non_instruction_abort : 1,
+ +                  retry                 : 1,
+ +                  data_conflict         : 1,
+ +                  capacity_writes       : 1,
+ +                  capacity_reads        : 1;
+ +      };
+ +      u64         value;
   };
   
   void init_debug_store_on_cpu(int cpu)
@@@ -596,6 -584,7 +596,7 @@@ struct event_constraint intel_snb_pebs_
         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
         INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
         EVENT_CONSTRAINT_END
   };
@@@ -797,24 -786,16 +798,24 @@@ static int intel_pmu_pebs_fixup_ip(stru
         return 0;
   }
   
+ +static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+ +{
+ +      if (pebs->tsx_tuning) {
+ +              union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ +              return tsx.cycles_last_block;
+ +      }
+ +      return 0;
+ +}
+ +
   static void __intel_pmu_pebs_event(struct perf_event *event,
                                    struct pt_regs *iregs, void *__pebs)
   {
         /*
- -       * We cast to pebs_record_nhm to get the load latency data
- -       * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+ +       * We cast to the biggest pebs_record but are careful not to
+ +       * unconditionally access the 'extra' entries.
          */
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      struct pebs_record_nhm *pebs = __pebs;
- -      struct pebs_record_hsw *pebs_hsw = __pebs;
+ +      struct pebs_record_hsw *pebs = __pebs;
         struct perf_sample_data data;
         struct pt_regs regs;
         u64 sample_type;
@@@ -873,7 -854,7 +874,7 @@@
         regs.sp = pebs->sp;
   
         if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- -              regs.ip = pebs_hsw->real_ip;
+ +              regs.ip = pebs->real_ip;
                 regs.flags |= PERF_EFLAGS_EXACT;
         } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
                 regs.flags |= PERF_EFLAGS_EXACT;
@@@ -881,14 -862,9 +882,14 @@@
                 regs.flags &= ~PERF_EFLAGS_EXACT;
   
         if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
- -              x86_pmu.intel_cap.pebs_format >= 1)
+ +          x86_pmu.intel_cap.pebs_format >= 1)
                 data.addr = pebs->dla;
   
+ +      /* Only set the TSX weight when no memory weight was requested. */
+ +      if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll &&
+ +          (x86_pmu.intel_cap.pebs_format >= 2))
+ +              data.weight = intel_hsw_weight(pebs);
+ +
         if (has_branch_stack(event))
                 data.br_stack = &cpuc->lbr_stack;
   
@@@ -937,34 -913,17 +938,34 @@@ static void intel_pmu_drain_pebs_core(s
         __intel_pmu_pebs_event(event, iregs, at);
   }
   
- -static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
- -                                      void *top)
+ +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         struct debug_store *ds = cpuc->ds;
         struct perf_event *event = NULL;
+ +      void *at, *top;
         u64 status = 0;
         int bit;
   
+ +      if (!x86_pmu.pebs_active)
+ +              return;
+ +
+ +      at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ +      top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+ +
         ds->pebs_index = ds->pebs_buffer_base;
   
+ +      if (unlikely(at > top))
+ +              return;
+ +
+ +      /*
+ +       * Should not happen, we program the threshold at 1 and do not
+ +       * set a reset value.
+ +       */
+ +      WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+ +                "Unexpected number of pebs records %ld\n",
+ +                (long)(top - at) / x86_pmu.pebs_record_size);
+ +
         for (; at < top; at += x86_pmu.pebs_record_size) {
                 struct pebs_record_nhm *p = at;
   
@@@ -992,6 -951,61 +993,6 @@@
         }
   }
   
- -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
- -{
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      struct debug_store *ds = cpuc->ds;
- -      struct pebs_record_nhm *at, *top;
- -      int n;
- -
- -      if (!x86_pmu.pebs_active)
- -              return;
- -
- -      at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- -      top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
- -
- -      ds->pebs_index = ds->pebs_buffer_base;
- -
- -      n = top - at;
- -      if (n <= 0)
- -              return;
- -
- -      /*
- -       * Should not happen, we program the threshold at 1 and do not
- -       * set a reset value.
- -       */
- -      WARN_ONCE(n > x86_pmu.max_pebs_events,
- -                "Unexpected number of pebs records %d\n", n);
- -
- -      return __intel_pmu_drain_pebs_nhm(iregs, at, top);
- -}
- -
- -static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
- -{
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      struct debug_store *ds = cpuc->ds;
- -      struct pebs_record_hsw *at, *top;
- -      int n;
- -
- -      if (!x86_pmu.pebs_active)
- -              return;
- -
- -      at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
- -      top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
- -
- -      n = top - at;
- -      if (n <= 0)
- -              return;
- -      /*
- -       * Should not happen, we program the threshold at 1 and do not
- -       * set a reset value.
- -       */
- -      WARN_ONCE(n > x86_pmu.max_pebs_events,
- -                "Unexpected number of pebs records %d\n", n);
- -
- -      return __intel_pmu_drain_pebs_nhm(iregs, at, top);
- -}
- -
   /*
    * BTS, PEBS probe and setup
    */
@@@ -1026,7 -1040,7 +1027,7 @@@ void intel_ds_init(void
                 case 2:
                         pr_cont("PEBS fmt2%c, ", pebs_type);
                         x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
- -                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                         break;
   
                 default:
author	Ingo Molnar <[email protected]>
	Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
committer	Ingo Molnar <[email protected]>
	Fri, 4 Oct 2013 07:59:13 +0000 (09:59 +0200)
		1	2
arch/x86/kernel/cpu/perf_event_intel.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event_intel_ds.c	patch \|	diff1 \|	diff2 \|	blob \| history