arch/tile/kernel/perf_event.c

   1 /*
   2  * Copyright 2014 Tilera Corporation. All Rights Reserved.
   3  *
   4  *   This program is free software; you can redistribute it and/or
   5  *   modify it under the terms of the GNU General Public License
   6  *   as published by the Free Software Foundation, version 2.
   7  *
   8  *   This program is distributed in the hope that it will be useful, but
   9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  *   NON INFRINGEMENT.  See the GNU General Public License for
  12  *   more details.
  13  *
  14  *
  15  * Perf_events support for Tile processor.
  16  *
  17  * This code is based upon the x86 perf event
  18  * code, which is:
  19  *
  20  *  Copyright (C) 2008 Thomas Gleixner <[email protected]>
  21  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  22  *  Copyright (C) 2009 Jaswinder Singh Rajput
  23  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  24  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  25  *  Copyright (C) 2009 Intel Corporation, <[email protected]>
  26  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  27  */
  28
  29 #include <linux/kprobes.h>
  30 #include <linux/kernel.h>
  31 #include <linux/kdebug.h>
  32 #include <linux/mutex.h>
  33 #include <linux/bitmap.h>
  34 #include <linux/irq.h>
  35 #include <linux/interrupt.h>
  36 #include <linux/perf_event.h>
  37 #include <linux/atomic.h>
  38 #include <asm/traps.h>
  39 #include <asm/stack.h>
  40 #include <asm/pmc.h>
  41 #include <hv/hypervisor.h>
  42
  43 #define TILE_MAX_COUNTERS       4
  44
  45 #define PERF_COUNT_0_IDX        0
  46 #define PERF_COUNT_1_IDX        1
  47 #define AUX_PERF_COUNT_0_IDX    2
  48 #define AUX_PERF_COUNT_1_IDX    3
  49
  50 struct cpu_hw_events {
  51         int                     n_events;
  52         struct perf_event       *events[TILE_MAX_COUNTERS]; /* counter order */
  53         struct perf_event       *event_list[TILE_MAX_COUNTERS]; /* enabled
  54                                                                 order */
  55         int                     assign[TILE_MAX_COUNTERS];
  56         unsigned long           active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)];
  57         unsigned long           used_mask;
  58 };
  59
  60 /* TILE arch specific performance monitor unit */
  61 struct tile_pmu {
  62         const char      *name;
  63         int             version;
  64         const int       *hw_events;     /* generic hw events table */
  65         /* generic hw cache events table */
  66         const int       (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
  67                                        [PERF_COUNT_HW_CACHE_OP_MAX]
  68                                        [PERF_COUNT_HW_CACHE_RESULT_MAX];
  69         int             (*map_hw_event)(u64);    /*method used to map
  70                                                   hw events */
  71         int             (*map_cache_event)(u64); /*method used to map
  72                                                   cache events */
  73
  74         u64             max_period;             /* max sampling period */
  75         u64             cntval_mask;            /* counter width mask */
  76         int             cntval_bits;            /* counter width */
  77         int             max_events;             /* max generic hw events
  78                                                 in map */
  79         int             num_counters;           /* number base + aux counters */
  80         int             num_base_counters;      /* number base counters */
  81 };
  82
  83 DEFINE_PER_CPU(u64, perf_irqs);
  84 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
  85
  86 #define TILE_OP_UNSUPP          (-1)
  87
  88 #ifndef __tilegx__
  89 /* TILEPro hardware events map */
  90 static const int tile_hw_event_map[] = {
  91         [PERF_COUNT_HW_CPU_CYCLES]              = 0x01, /* ONE */
  92         [PERF_COUNT_HW_INSTRUCTIONS]            = 0x06, /* MP_BUNDLE_RETIRED */
  93         [PERF_COUNT_HW_CACHE_REFERENCES]        = TILE_OP_UNSUPP,
  94         [PERF_COUNT_HW_CACHE_MISSES]            = TILE_OP_UNSUPP,
  95         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x16, /*
  96                                           MP_CONDITIONAL_BRANCH_ISSUED */
  97         [PERF_COUNT_HW_BRANCH_MISSES]           = 0x14, /*
  98                                           MP_CONDITIONAL_BRANCH_MISSPREDICT */
  99         [PERF_COUNT_HW_BUS_CYCLES]              = TILE_OP_UNSUPP,
 100 };
 101 #else
 102 /* TILEGx hardware events map */
 103 static const int tile_hw_event_map[] = {
 104         [PERF_COUNT_HW_CPU_CYCLES]              = 0x181, /* ONE */
 105         [PERF_COUNT_HW_INSTRUCTIONS]            = 0xdb, /* INSTRUCTION_BUNDLE */
 106         [PERF_COUNT_HW_CACHE_REFERENCES]        = TILE_OP_UNSUPP,
 107         [PERF_COUNT_HW_CACHE_MISSES]            = TILE_OP_UNSUPP,
 108         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0xd9, /*
 109                                                 COND_BRANCH_PRED_CORRECT */
 110         [PERF_COUNT_HW_BRANCH_MISSES]           = 0xda, /*
 111                                                 COND_BRANCH_PRED_INCORRECT */
 112         [PERF_COUNT_HW_BUS_CYCLES]              = TILE_OP_UNSUPP,
 113 };
 114 #endif
 115
 116 #define C(x) PERF_COUNT_HW_CACHE_##x
 117
 118 /*
 119  * Generalized hw caching related hw_event table, filled
 120  * in on a per model basis. A value of -1 means
 121  * 'not supported', any other value means the
 122  * raw hw_event ID.
 123  */
 124 #ifndef __tilegx__
 125 /* TILEPro hardware cache event map */
 126 static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
 127                                      [PERF_COUNT_HW_CACHE_OP_MAX]
 128                                      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 129 [C(L1D)] = {
 130         [C(OP_READ)] = {
 131                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 132                 [C(RESULT_MISS)] = 0x21, /* RD_MISS */
 133         },
 134         [C(OP_WRITE)] = {
 135                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 136                 [C(RESULT_MISS)] = 0x22, /* WR_MISS */
 137         },
 138         [C(OP_PREFETCH)] = {
 139                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 140                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 141         },
 142 },
 143 [C(L1I)] = {
 144         [C(OP_READ)] = {
 145                 [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */
 146                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 147         },
 148         [C(OP_WRITE)] = {
 149                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 150                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 151         },
 152         [C(OP_PREFETCH)] = {
 153                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 154                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 155         },
 156 },
 157 [C(LL)] = {
 158         [C(OP_READ)] = {
 159                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 160                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 161         },
 162         [C(OP_WRITE)] = {
 163                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 164                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 165         },
 166         [C(OP_PREFETCH)] = {
 167                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 168                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 169         },
 170 },
 171 [C(DTLB)] = {
 172         [C(OP_READ)] = {
 173                 [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */
 174                 [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */
 175         },
 176         [C(OP_WRITE)] = {
 177                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 178                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 179         },
 180         [C(OP_PREFETCH)] = {
 181                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 182                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 183         },
 184 },
 185 [C(ITLB)] = {
 186         [C(OP_READ)] = {
 187                 [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */
 188                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 189         },
 190         [C(OP_WRITE)] = {
 191                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 192                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 193         },
 194         [C(OP_PREFETCH)] = {
 195                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 196                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 197         },
 198 },
 199 [C(BPU)] = {
 200         [C(OP_READ)] = {
 201                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 202                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 203         },
 204         [C(OP_WRITE)] = {
 205                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 206                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 207         },
 208         [C(OP_PREFETCH)] = {
 209                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 210                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 211         },
 212 },
 213 };
 214 #else
 215 /* TILEGx hardware events map */
 216 static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
 217                                      [PERF_COUNT_HW_CACHE_OP_MAX]
 218                                      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 219 [C(L1D)] = {
 220         /*
 221          * Like some other architectures (e.g. ARM), the performance
 222          * counters don't differentiate between read and write
 223          * accesses/misses, so this isn't strictly correct, but it's the
 224          * best we can do. Writes and reads get combined.
 225          */
 226         [C(OP_READ)] = {
 227                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 228                 [C(RESULT_MISS)] = 0x44, /* RD_MISS */
 229         },
 230         [C(OP_WRITE)] = {
 231                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 232                 [C(RESULT_MISS)] = 0x45, /* WR_MISS */
 233         },
 234         [C(OP_PREFETCH)] = {
 235                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 236                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 237         },
 238 },
 239 [C(L1I)] = {
 240         [C(OP_READ)] = {
 241                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 242                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 243         },
 244         [C(OP_WRITE)] = {
 245                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 246                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 247         },
 248         [C(OP_PREFETCH)] = {
 249                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 250                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 251         },
 252 },
 253 [C(LL)] = {
 254         [C(OP_READ)] = {
 255                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 256                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 257         },
 258         [C(OP_WRITE)] = {
 259                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 260                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 261         },
 262         [C(OP_PREFETCH)] = {
 263                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 264                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 265         },
 266 },
 267 [C(DTLB)] = {
 268         [C(OP_READ)] = {
 269                 [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */
 270                 [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */
 271         },
 272         [C(OP_WRITE)] = {
 273                 [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */
 274                 [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */
 275         },
 276         [C(OP_PREFETCH)] = {
 277                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 278                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 279         },
 280 },
 281 [C(ITLB)] = {
 282         [C(OP_READ)] = {
 283                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 284                 [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */
 285         },
 286         [C(OP_WRITE)] = {
 287                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 288                 [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */
 289         },
 290         [C(OP_PREFETCH)] = {
 291                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 292                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 293         },
 294 },
 295 [C(BPU)] = {
 296         [C(OP_READ)] = {
 297                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 298                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 299         },
 300         [C(OP_WRITE)] = {
 301                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 302                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 303         },
 304         [C(OP_PREFETCH)] = {
 305                 [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 306                 [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 307         },
 308 },
 309 };
 310 #endif
 311
 312 static atomic_t tile_active_events;
 313 static DEFINE_MUTEX(perf_intr_reserve_mutex);
 314
 315 static int tile_map_hw_event(u64 config);
 316 static int tile_map_cache_event(u64 config);
 317
 318 static int tile_pmu_handle_irq(struct pt_regs *regs, int fault);
 319
 320 /*
 321  * To avoid new_raw_count getting larger then pre_raw_count
 322  * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1.
 323  */
 324 static const struct tile_pmu tilepmu = {
 325 #ifndef __tilegx__
 326         .name = "tilepro",
 327 #else
 328         .name = "tilegx",
 329 #endif
 330         .max_events = ARRAY_SIZE(tile_hw_event_map),
 331         .map_hw_event = tile_map_hw_event,
 332         .hw_events = tile_hw_event_map,
 333         .map_cache_event = tile_map_cache_event,
 334         .cache_events = &tile_cache_event_map,
 335         .cntval_bits = 32,
 336         .cntval_mask = (1ULL << 32) - 1,
 337         .max_period = (1ULL << 31) - 1,
 338         .num_counters = TILE_MAX_COUNTERS,
 339         .num_base_counters = TILE_BASE_COUNTERS,
 340 };
 341
 342 static const struct tile_pmu *tile_pmu __read_mostly;
 343
 344 /*
 345  * Check whether perf event is enabled.
 346  */
 347 int tile_perf_enabled(void)
 348 {
 349         return atomic_read(&tile_active_events) != 0;
 350 }
 351
 352 /*
 353  * Read Performance Counters.
 354  */
 355 static inline u64 read_counter(int idx)
 356 {
 357         u64 val = 0;
 358
 359         /* __insn_mfspr() only takes an immediate argument */
 360         switch (idx) {
 361         case PERF_COUNT_0_IDX:
 362                 val = __insn_mfspr(SPR_PERF_COUNT_0);
 363                 break;
 364         case PERF_COUNT_1_IDX:
 365                 val = __insn_mfspr(SPR_PERF_COUNT_1);
 366                 break;
 367         case AUX_PERF_COUNT_0_IDX:
 368                 val = __insn_mfspr(SPR_AUX_PERF_COUNT_0);
 369                 break;
 370         case AUX_PERF_COUNT_1_IDX:
 371                 val = __insn_mfspr(SPR_AUX_PERF_COUNT_1);
 372                 break;
 373         default:
 374                 WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX ||
 375                                 idx < PERF_COUNT_0_IDX);
 376         }
 377
 378         return val;
 379 }
 380
 381 /*
 382  * Write Performance Counters.
 383  */
 384 static inline void write_counter(int idx, u64 value)
 385 {
 386         /* __insn_mtspr() only takes an immediate argument */
 387         switch (idx) {
 388         case PERF_COUNT_0_IDX:
 389                 __insn_mtspr(SPR_PERF_COUNT_0, value);
 390                 break;
 391         case PERF_COUNT_1_IDX:
 392                 __insn_mtspr(SPR_PERF_COUNT_1, value);
 393                 break;
 394         case AUX_PERF_COUNT_0_IDX:
 395                 __insn_mtspr(SPR_AUX_PERF_COUNT_0, value);
 396                 break;
 397         case AUX_PERF_COUNT_1_IDX:
 398                 __insn_mtspr(SPR_AUX_PERF_COUNT_1, value);
 399                 break;
 400         default:
 401                 WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX ||
 402                                 idx < PERF_COUNT_0_IDX);
 403         }
 404 }
 405
 406 /*
 407  * Enable performance event by setting
 408  * Performance Counter Control registers.
 409  */
 410 static inline void tile_pmu_enable_event(struct perf_event *event)
 411 {
 412         struct hw_perf_event *hwc = &event->hw;
 413         unsigned long cfg, mask;
 414         int shift, idx = hwc->idx;
 415
 416         /*
 417          * prevent early activation from tile_pmu_start() in hw_perf_enable
 418          */
 419
 420         if (WARN_ON_ONCE(idx == -1))
 421                 return;
 422
 423         if (idx < tile_pmu->num_base_counters)
 424                 cfg = __insn_mfspr(SPR_PERF_COUNT_CTL);
 425         else
 426                 cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL);
 427
 428         switch (idx) {
 429         case PERF_COUNT_0_IDX:
 430         case AUX_PERF_COUNT_0_IDX:
 431                 mask = TILE_EVENT_MASK;
 432                 shift = 0;
 433                 break;
 434         case PERF_COUNT_1_IDX:
 435         case AUX_PERF_COUNT_1_IDX:
 436                 mask = TILE_EVENT_MASK << 16;
 437                 shift = 16;
 438                 break;
 439         default:
 440                 WARN_ON_ONCE(idx < PERF_COUNT_0_IDX ||
 441                         idx > AUX_PERF_COUNT_1_IDX);
 442                 return;
 443         }
 444
 445         /* Clear mask bits to enable the event. */
 446         cfg &= ~mask;
 447         cfg |= hwc->config << shift;
 448
 449         if (idx < tile_pmu->num_base_counters)
 450                 __insn_mtspr(SPR_PERF_COUNT_CTL, cfg);
 451         else
 452                 __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg);
 453 }
 454
 455 /*
 456  * Disable performance event by clearing
 457  * Performance Counter Control registers.
 458  */
 459 static inline void tile_pmu_disable_event(struct perf_event *event)
 460 {
 461         struct hw_perf_event *hwc = &event->hw;
 462         unsigned long cfg, mask;
 463         int idx = hwc->idx;
 464
 465         if (idx == -1)
 466                 return;
 467
 468         if (idx < tile_pmu->num_base_counters)
 469                 cfg = __insn_mfspr(SPR_PERF_COUNT_CTL);
 470         else
 471                 cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL);
 472
 473         switch (idx) {
 474         case PERF_COUNT_0_IDX:
 475         case AUX_PERF_COUNT_0_IDX:
 476                 mask = TILE_PLM_MASK;
 477                 break;
 478         case PERF_COUNT_1_IDX:
 479         case AUX_PERF_COUNT_1_IDX:
 480                 mask = TILE_PLM_MASK << 16;
 481                 break;
 482         default:
 483                 WARN_ON_ONCE(idx < PERF_COUNT_0_IDX ||
 484                         idx > AUX_PERF_COUNT_1_IDX);
 485                 return;
 486         }
 487
 488         /* Set mask bits to disable the event. */
 489         cfg |= mask;
 490
 491         if (idx < tile_pmu->num_base_counters)
 492                 __insn_mtspr(SPR_PERF_COUNT_CTL, cfg);
 493         else
 494                 __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg);
 495 }
 496
 497 /*
 498  * Propagate event elapsed time into the generic event.
 499  * Can only be executed on the CPU where the event is active.
 500  * Returns the delta events processed.
 501  */
 502 static u64 tile_perf_event_update(struct perf_event *event)
 503 {
 504         struct hw_perf_event *hwc = &event->hw;
 505         int shift = 64 - tile_pmu->cntval_bits;
 506         u64 prev_raw_count, new_raw_count;
 507         u64 oldval;
 508         int idx = hwc->idx;
 509         u64 delta;
 510
 511         /*
 512          * Careful: an NMI might modify the previous event value.
 513          *
 514          * Our tactic to handle this is to first atomically read and
 515          * exchange a new raw count - then add that new-prev delta
 516          * count to the generic event atomically:
 517          */
 518 again:
 519         prev_raw_count = local64_read(&hwc->prev_count);
 520         new_raw_count = read_counter(idx);
 521
 522         oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 523                                  new_raw_count);
 524         if (oldval != prev_raw_count)
 525                 goto again;
 526
 527         /*
 528          * Now we have the new raw value and have updated the prev
 529          * timestamp already. We can now calculate the elapsed delta
 530          * (event-)time and add that to the generic event.
 531          *
 532          * Careful, not all hw sign-extends above the physical width
 533          * of the count.
 534          */
 535         delta = (new_raw_count << shift) - (prev_raw_count << shift);
 536         delta >>= shift;
 537
 538         local64_add(delta, &event->count);
 539         local64_sub(delta, &hwc->period_left);
 540
 541         return new_raw_count;
 542 }
 543
 544 /*
 545  * Set the next IRQ period, based on the hwc->period_left value.
 546  * To be called with the event disabled in hw:
 547  */
 548 static int tile_event_set_period(struct perf_event *event)
 549 {
 550         struct hw_perf_event *hwc = &event->hw;
 551         int idx = hwc->idx;
 552         s64 left = local64_read(&hwc->period_left);
 553         s64 period = hwc->sample_period;
 554         int ret = 0;
 555
 556         /*
 557          * If we are way outside a reasonable range then just skip forward:
 558          */
 559         if (unlikely(left <= -period)) {
 560                 left = period;
 561                 local64_set(&hwc->period_left, left);
 562                 hwc->last_period = period;
 563                 ret = 1;
 564         }
 565
 566         if (unlikely(left <= 0)) {
 567                 left += period;
 568                 local64_set(&hwc->period_left, left);
 569                 hwc->last_period = period;
 570                 ret = 1;
 571         }
 572         if (left > tile_pmu->max_period)
 573                 left = tile_pmu->max_period;
 574
 575         /*
 576          * The hw event starts counting from this event offset,
 577          * mark it to be able to extra future deltas:
 578          */
 579         local64_set(&hwc->prev_count, (u64)-left);
 580
 581         write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask);
 582
 583         perf_event_update_userpage(event);
 584
 585         return ret;
 586 }
 587
 588 /*
 589  * Stop the event but do not release the PMU counter
 590  */
 591 static void tile_pmu_stop(struct perf_event *event, int flags)
 592 {
 593         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 594         struct hw_perf_event *hwc = &event->hw;
 595         int idx = hwc->idx;
 596
 597         if (__test_and_clear_bit(idx, cpuc->active_mask)) {
 598                 tile_pmu_disable_event(event);
 599                 cpuc->events[hwc->idx] = NULL;
 600                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 601                 hwc->state |= PERF_HES_STOPPED;
 602         }
 603
 604         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 605                 /*
 606                  * Drain the remaining delta count out of a event
 607                  * that we are disabling:
 608                  */
 609                 tile_perf_event_update(event);
 610                 hwc->state |= PERF_HES_UPTODATE;
 611         }
 612 }
 613
 614 /*
 615  * Start an event (without re-assigning counter)
 616  */
 617 static void tile_pmu_start(struct perf_event *event, int flags)
 618 {
 619         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 620         int idx = event->hw.idx;
 621
 622         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 623                 return;
 624
 625         if (WARN_ON_ONCE(idx == -1))
 626                 return;
 627
 628         if (flags & PERF_EF_RELOAD) {
 629                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 630                 tile_event_set_period(event);
 631         }
 632
 633         event->hw.state = 0;
 634
 635         cpuc->events[idx] = event;
 636         __set_bit(idx, cpuc->active_mask);
 637
 638         unmask_pmc_interrupts();
 639
 640         tile_pmu_enable_event(event);
 641
 642         perf_event_update_userpage(event);
 643 }
 644
 645 /*
 646  * Add a single event to the PMU.
 647  *
 648  * The event is added to the group of enabled events
 649  * but only if it can be scehduled with existing events.
 650  */
 651 static int tile_pmu_add(struct perf_event *event, int flags)
 652 {
 653         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 654         struct hw_perf_event *hwc;
 655         unsigned long mask;
 656         int b, max_cnt;
 657
 658         hwc = &event->hw;
 659
 660         /*
 661          * We are full.
 662          */
 663         if (cpuc->n_events == tile_pmu->num_counters)
 664                 return -ENOSPC;
 665
 666         cpuc->event_list[cpuc->n_events] = event;
 667         cpuc->n_events++;
 668
 669         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 670         if (!(flags & PERF_EF_START))
 671                 hwc->state |= PERF_HES_ARCH;
 672
 673         /*
 674          * Find first empty counter.
 675          */
 676         max_cnt = tile_pmu->num_counters;
 677         mask = ~cpuc->used_mask;
 678
 679         /* Find next free counter. */
 680         b = find_next_bit(&mask, max_cnt, 0);
 681
 682         /* Should not happen. */
 683         if (WARN_ON_ONCE(b == max_cnt))
 684                 return -ENOSPC;
 685
 686         /*
 687          * Assign counter to event.
 688          */
 689         event->hw.idx = b;
 690         __set_bit(b, &cpuc->used_mask);
 691
 692         /*
 693          * Start if requested.
 694          */
 695         if (flags & PERF_EF_START)
 696                 tile_pmu_start(event, PERF_EF_RELOAD);
 697
 698         return 0;
 699 }
 700
 701 /*
 702  * Delete a single event from the PMU.
 703  *
 704  * The event is deleted from the group of enabled events.
 705  * If it is the last event, disable PMU interrupt.
 706  */
 707 static void tile_pmu_del(struct perf_event *event, int flags)
 708 {
 709         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 710         int i;
 711
 712         /*
 713          * Remove event from list, compact list if necessary.
 714          */
 715         for (i = 0; i < cpuc->n_events; i++) {
 716                 if (cpuc->event_list[i] == event) {
 717                         while (++i < cpuc->n_events)
 718                                 cpuc->event_list[i-1] = cpuc->event_list[i];
 719                         --cpuc->n_events;
 720                         cpuc->events[event->hw.idx] = NULL;
 721                         __clear_bit(event->hw.idx, &cpuc->used_mask);
 722                         tile_pmu_stop(event, PERF_EF_UPDATE);
 723                         break;
 724                 }
 725         }
 726         /*
 727          * If there are no events left, then mask PMU interrupt.
 728          */
 729         if (cpuc->n_events == 0)
 730                 mask_pmc_interrupts();
 731         perf_event_update_userpage(event);
 732 }
 733
 734 /*
 735  * Propagate event elapsed time into the event.
 736  */
 737 static inline void tile_pmu_read(struct perf_event *event)
 738 {
 739         tile_perf_event_update(event);
 740 }
 741
 742 /*
 743  * Map generic events to Tile PMU.
 744  */
 745 static int tile_map_hw_event(u64 config)
 746 {
 747         if (config >= tile_pmu->max_events)
 748                 return -EINVAL;
 749         return tile_pmu->hw_events[config];
 750 }
 751
 752 /*
 753  * Map generic hardware cache events to Tile PMU.
 754  */
 755 static int tile_map_cache_event(u64 config)
 756 {
 757         unsigned int cache_type, cache_op, cache_result;
 758         int code;
 759
 760         if (!tile_pmu->cache_events)
 761                 return -ENOENT;
 762
 763         cache_type = (config >>  0) & 0xff;
 764         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
 765                 return -EINVAL;
 766
 767         cache_op = (config >>  8) & 0xff;
 768         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
 769                 return -EINVAL;
 770
 771         cache_result = (config >> 16) & 0xff;
 772         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 773                 return -EINVAL;
 774
 775         code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result];
 776         if (code == TILE_OP_UNSUPP)
 777                 return -EINVAL;
 778
 779         return code;
 780 }
 781
 782 static void tile_event_destroy(struct perf_event *event)
 783 {
 784         if (atomic_dec_return(&tile_active_events) == 0)
 785                 release_pmc_hardware();
 786 }
 787
 788 static int __tile_event_init(struct perf_event *event)
 789 {
 790         struct perf_event_attr *attr = &event->attr;
 791         struct hw_perf_event *hwc = &event->hw;
 792         int code;
 793
 794         switch (attr->type) {
 795         case PERF_TYPE_HARDWARE:
 796                 code = tile_pmu->map_hw_event(attr->config);
 797                 break;
 798         case PERF_TYPE_HW_CACHE:
 799                 code = tile_pmu->map_cache_event(attr->config);
 800                 break;
 801         case PERF_TYPE_RAW:
 802                 code = attr->config & TILE_EVENT_MASK;
 803                 break;
 804         default:
 805                 /* Should not happen. */
 806                 return -EOPNOTSUPP;
 807         }
 808
 809         if (code < 0)
 810                 return code;
 811
 812         hwc->config = code;
 813         hwc->idx = -1;
 814
 815         if (attr->exclude_user)
 816                 hwc->config |= TILE_CTL_EXCL_USER;
 817
 818         if (attr->exclude_kernel)
 819                 hwc->config |= TILE_CTL_EXCL_KERNEL;
 820
 821         if (attr->exclude_hv)
 822                 hwc->config |= TILE_CTL_EXCL_HV;
 823
 824         if (!hwc->sample_period) {
 825                 hwc->sample_period = tile_pmu->max_period;
 826                 hwc->last_period = hwc->sample_period;
 827                 local64_set(&hwc->period_left, hwc->sample_period);
 828         }
 829         event->destroy = tile_event_destroy;
 830         return 0;
 831 }
 832
 833 static int tile_event_init(struct perf_event *event)
 834 {
 835         int err = 0;
 836         perf_irq_t old_irq_handler = NULL;
 837
 838         if (atomic_inc_return(&tile_active_events) == 1)
 839                 old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq);
 840
 841         if (old_irq_handler) {
 842                 pr_warn("PMC hardware busy (reserved by oprofile)\n");
 843
 844                 atomic_dec(&tile_active_events);
 845                 return -EBUSY;
 846         }
 847
 848         switch (event->attr.type) {
 849         case PERF_TYPE_RAW:
 850         case PERF_TYPE_HARDWARE:
 851         case PERF_TYPE_HW_CACHE:
 852                 break;
 853
 854         default:
 855                 return -ENOENT;
 856         }
 857
 858         err = __tile_event_init(event);
 859         if (err) {
 860                 if (event->destroy)
 861                         event->destroy(event);
 862         }
 863         return err;
 864 }
 865
 866 static struct pmu tilera_pmu = {
 867         .event_init     = tile_event_init,
 868         .add            = tile_pmu_add,
 869         .del            = tile_pmu_del,
 870
 871         .start          = tile_pmu_start,
 872         .stop           = tile_pmu_stop,
 873
 874         .read           = tile_pmu_read,
 875 };
 876
 877 /*
 878  * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler.
 879  */
 880 int tile_pmu_handle_irq(struct pt_regs *regs, int fault)
 881 {
 882         struct perf_sample_data data;
 883         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 884         struct perf_event *event;
 885         struct hw_perf_event *hwc;
 886         u64 val;
 887         unsigned long status;
 888         int bit;
 889
 890         __this_cpu_inc(perf_irqs);
 891
 892         if (!atomic_read(&tile_active_events))
 893                 return 0;
 894
 895         status = pmc_get_overflow();
 896         pmc_ack_overflow(status);
 897
 898         for_each_set_bit(bit, &status, tile_pmu->num_counters) {
 899
 900                 event = cpuc->events[bit];
 901
 902                 if (!event)
 903                         continue;
 904
 905                 if (!test_bit(bit, cpuc->active_mask))
 906                         continue;
 907
 908                 hwc = &event->hw;
 909
 910                 val = tile_perf_event_update(event);
 911                 if (val & (1ULL << (tile_pmu->cntval_bits - 1)))
 912                         continue;
 913
 914                 perf_sample_data_init(&data, 0, event->hw.last_period);
 915                 if (!tile_event_set_period(event))
 916                         continue;
 917
 918                 if (perf_event_overflow(event, &data, regs))
 919                         tile_pmu_stop(event, 0);
 920         }
 921
 922         return 0;
 923 }
 924
 925 static bool __init supported_pmu(void)
 926 {
 927         tile_pmu = &tilepmu;
 928         return true;
 929 }
 930
 931 int __init init_hw_perf_events(void)
 932 {
 933         supported_pmu();
 934         perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW);
 935         return 0;
 936 }
 937 arch_initcall(init_hw_perf_events);
 938
 939 /* Callchain handling code. */
 940
 941 /*
 942  * Tile specific backtracing code for perf_events.
 943  */
 944 static inline void perf_callchain(struct perf_callchain_entry_ctx *entry,
 945                     struct pt_regs *regs)
 946 {
 947         struct KBacktraceIterator kbt;
 948         unsigned int i;
 949
 950         /*
 951          * Get the address just after the "jalr" instruction that
 952          * jumps to the handler for a syscall.  When we find this
 953          * address in a backtrace, we silently ignore it, which gives
 954          * us a one-step backtrace connection from the sys_xxx()
 955          * function in the kernel to the xxx() function in libc.
 956          * Otherwise, we lose the ability to properly attribute time
 957          * from the libc calls to the kernel implementations, since
 958          * oprofile only considers PCs from backtraces a pair at a time.
 959          */
 960         unsigned long handle_syscall_pc = handle_syscall_link_address();
 961
 962         KBacktraceIterator_init(&kbt, NULL, regs);
 963         kbt.profile = 1;
 964
 965         /*
 966          * The sample for the pc is already recorded.  Now we are adding the
 967          * address of the callsites on the stack.  Our iterator starts
 968          * with the frame of the (already sampled) call site.  If our
 969          * iterator contained a "return address" field, we could have just
 970          * used it and wouldn't have needed to skip the first
 971          * frame.  That's in effect what the arm and x86 versions do.
 972          * Instead we peel off the first iteration to get the equivalent
 973          * behavior.
 974          */
 975
 976         if (KBacktraceIterator_end(&kbt))
 977                 return;
 978         KBacktraceIterator_next(&kbt);
 979
 980         /*
 981          * Set stack depth to 16 for user and kernel space respectively, that
 982          * is, total 32 stack frames.
 983          */
 984         for (i = 0; i < 16; ++i) {
 985                 unsigned long pc;
 986                 if (KBacktraceIterator_end(&kbt))
 987                         break;
 988                 pc = kbt.it.pc;
 989                 if (pc != handle_syscall_pc)
 990                         perf_callchain_store(entry, pc);
 991                 KBacktraceIterator_next(&kbt);
 992         }
 993 }
 994
 995 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 996                     struct pt_regs *regs)
 997 {
 998         perf_callchain(entry, regs);
 999 }
1000
1001 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
1002                       struct pt_regs *regs)
1003 {
1004         perf_callchain(entry, regs);
1005 }