]> Git Repo - linux.git/commitdiff
Merge tag 'trace-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux...
authorLinus Torvalds <[email protected]>
Mon, 29 Apr 2013 20:55:38 +0000 (13:55 -0700)
committerLinus Torvalds <[email protected]>
Mon, 29 Apr 2013 20:55:38 +0000 (13:55 -0700)
Pull tracing updates from Steven Rostedt:
 "Along with the usual minor fixes and clean ups there are a few major
  changes with this pull request.

   1) Multiple buffers for the ftrace facility

  This feature has been requested by many people over the last few
  years.  I even heard that Google was about to implement it themselves.
  I finally had time and cleaned up the code such that you can now
  create multiple instances of the ftrace buffer and have different
  events go to different buffers.  This way, a low frequency event will
  not be lost in the noise of a high frequency event.

  Note, currently only events can go to different buffers, the tracers
  (ie function, function_graph and the latency tracers) still can only
  be written to the main buffer.

   2) The function tracer triggers have now been extended.

  The function tracer had two triggers.  One to enable tracing when a
  function is hit, and one to disable tracing.  Now you can record a
  stack trace on a single (or many) function(s), take a snapshot of the
  buffer (copy it to the snapshot buffer), and you can enable or disable
  an event to be traced when a function is hit.

   3) A perf clock has been added.

  A "perf" clock can be chosen to be used when tracing.  This will cause
  ftrace to use the same clock as perf uses, and hopefully this will
  make it easier to interleave the perf and ftrace data for analysis."

* tag 'trace-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (82 commits)
  tracepoints: Prevent null probe from being added
  tracing: Compare to 1 instead of zero for is_signed_type()
  tracing: Remove obsolete macro guard _TRACE_PROFILE_INIT
  ftrace: Get rid of ftrace_profile_bits
  tracing: Check return value of tracing_init_dentry()
  tracing: Get rid of unneeded key calculation in ftrace_hash_move()
  tracing: Reset ftrace_graph_filter_enabled if count is zero
  tracing: Fix off-by-one on allocating stat->pages
  kernel: tracing: Use strlcpy instead of strncpy
  tracing: Update debugfs README file
  tracing: Fix ftrace_dump()
  tracing: Rename trace_event_mutex to trace_event_sem
  tracing: Fix comment about prefix in arch_syscall_match_sym_name()
  tracing: Convert trace_destroy_fields() to static
  tracing: Move find_event_field() into trace_events.c
  tracing: Use TRACE_MAX_PRINT instead of constant
  tracing: Use pr_warn_once instead of open coded implementation
  ring-buffer: Add ring buffer startup selftest
  tracing: Bring Documentation/trace/ftrace.txt up to date
  tracing: Add "perf" trace_clock
  ...

Conflicts:
kernel/trace/ftrace.c
kernel/trace/trace.c

1  2 
Documentation/kernel-parameters.txt
include/linux/ftrace.h
include/linux/kernel.h
kernel/trace/Kconfig
kernel/trace/blktrace.c
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_output.c
kernel/trace/trace_stack.c
kernel/tracepoint.c

index 8ccbf27aead4def123c94eaf0de40e39db40199f,0edc409f9eded380ec0d15384505ae6d325dfb73..5abc09a93bc274d5c2453b6038a6cfba8f215c13
@@@ -320,6 -320,13 +320,13 @@@ bytes respectively. Such letter suffixe
                        on: enable for both 32- and 64-bit processes
                        off: disable for both 32- and 64-bit processes
  
+       alloc_snapshot  [FTRACE]
+                       Allocate the ftrace snapshot buffer on boot up when the
+                       main buffer is allocated. This is handy if debugging
+                       and you need to use tracing_snapshot() on boot up, and
+                       do not want to use tracing_snapshot_alloc() as it needs
+                       to be done where GFP_KERNEL allocations are allowed.
        amd_iommu=      [HW,X86-64]
                        Pass parameters to the AMD IOMMU driver in the system.
                        Possible values are:
                        UART at the specified I/O port or MMIO address,
                        switching to the matching ttyS device later.  The
                        options are the same as for ttyS, above.
 +              hvc<n>  Use the hypervisor console device <n>. This is for
 +                      both Xen and PowerPC hypervisors.
  
                  If the device connected to the port is not a TTY but a braille
                  device, prepend "brl," before the device type, for instance
                        a memory unit (amount[KMG]). See also
                        Documentation/kdump/kdump.txt for an example.
  
 +      crashkernel=size[KMG],high
 +                      [KNL, x86_64] range could be above 4G. Allow kernel
 +                      to allocate physical memory region from top, so could
 +                      be above 4G if system have more than 4G ram installed.
 +                      Otherwise memory region will be allocated below 4G, if
 +                      available.
 +                      It will be ignored if crashkernel=X is specified.
 +      crashkernel=size[KMG],low
 +                      [KNL, x86_64] range under 4G. When crashkernel=X,high
 +                      is passed, kernel could allocate physical memory region
 +                      above 4G, that cause second kernel crash on system
 +                      that require some amount of low memory, e.g. swiotlb
 +                      requires at least 64M+32K low memory.  Kernel would
 +                      try to allocate 72M below 4G automatically.
 +                      This one let user to specify own low range under 4G
 +                      for second kernel instead.
 +                      0: to disable low allocation.
 +                      It will be ignored when crashkernel=X,high is not used
 +                      or memory reserved is below 4G.
 +
        cs89x0_dma=     [HW,NET]
                        Format: <dma>
  
  
        earlyprintk=    [X86,SH,BLACKFIN]
                        earlyprintk=vga
 +                      earlyprintk=xen
                        earlyprintk=serial[,ttySn[,baudrate]]
                        earlyprintk=ttySn[,baudrate]
                        earlyprintk=dbgp[debugController#]
                        The VGA output is eventually overwritten by the real
                        console.
  
 +                      The xen output can only be used by Xen PV guests.
 +
        ekgdboc=        [X86,KGDB] Allow early kernel console debugging
                        ekgdboc=kbd
  
        edd=            [EDD]
                        Format: {"off" | "on" | "skip[mbr]"}
  
 +      efi_no_storage_paranoia [EFI; X86]
 +                      Using this parameter you can use more than 50% of
 +                      your efi variable storage. Use this parameter only if
 +                      you are really sure that your UEFI does sane gc and
 +                      fulfills the spec otherwise your board may brick.
 +
        eisa_irq_edge=  [PARISC,HW]
                        See header of drivers/parisc/eisa.c.
  
                               If specified, z/VM IUCV HVC accepts connections
                               from listed z/VM user IDs only.
  
 +      hwthread_map=   [METAG] Comma-separated list of Linux cpu id to
 +                              hardware thread id mappings.
 +                              Format: <cpu>:<hwthread>
 +
        keep_bootcon    [KNL]
                        Do not unregister boot console at start. This is only
                        useful for debugging when something happens in the window
                        Claim all unknown PCI IDE storage controllers.
  
        idle=           [X86]
 -                      Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
 +                      Format: idle=poll, idle=halt, idle=nomwait
                        Poll forces a polling idle loop that can slightly
                        improve the performance of waking up a idle CPU, but
                        will use a lot of power and make the system run hot.
                        Not recommended.
 -                      idle=mwait: On systems which support MONITOR/MWAIT but
 -                      the kernel chose to not use it because it doesn't save
 -                      as much power as a normal idle loop, use the
 -                      MONITOR/MWAIT idle loop anyways. Performance should be
 -                      the same as idle=poll.
                        idle=halt: Halt is forced to be used for CPU idle.
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
                        0       disables intel_idle and fall back on acpi_idle.
                        1 to 6  specify maximum depth of C-state.
  
 +      intel_pstate=  [X86]
 +                     disable
 +                       Do not enable intel_pstate as the default
 +                       scaling driver for the supported processors
 +
        intremap=       [X86-64, Intel-IOMMU]
                        on      enable Interrupt Remapping (default)
                        off     disable Interrupt Remapping
                        wfi(ARM) instruction doesn't work correctly and not to
                        use it. This is also useful when using JTAG debugger.
  
 -      no-hlt          [BUGS=X86-32] Tells the kernel that the hlt
 -                      instruction doesn't work correctly and not to
 -                      use it.
 -
        no_file_caps    Tells the kernel not to honor file capabilities.  The
                        only way then for a file to be executed with privilege
                        is to be setuid root or executed by root.
                                This sorting is done to get a device
                                order compatible with older (<= 2.4) kernels.
                nobfsort        Don't sort PCI devices into breadth-first order.
 +              pcie_bus_tune_off       Disable PCIe MPS (Max Payload Size)
 +                              tuning and use the BIOS-configured MPS defaults.
 +              pcie_bus_safe   Set every device's MPS to the largest value
 +                              supported by all devices below the root complex.
 +              pcie_bus_perf   Set device MPS to the largest allowable MPS
 +                              based on its parent bus. Also set MRRS (Max
 +                              Read Request Size) to the largest supported
 +                              value (no larger than the MPS that the device
 +                              or bus can support) for best performance.
 +              pcie_bus_peer2peer      Set every device's MPS to 128B, which
 +                              every device is guaranteed to support. This
 +                              configuration allows peer-to-peer DMA between
 +                              any pair of devices, possibly at the cost of
 +                              reduced performance.  This also guarantees
 +                              that hot-added devices will work.
                cbiosize=nn[KMG]        The fixed amount of bus space which is
                                reserved for the CardBus bridge's IO window.
                                The default value is 256 bytes.
                                the default.
                                off: Turn ECRC off
                                on: Turn ECRC on.
 +              hpiosize=nn[KMG]        The fixed amount of bus space which is
 +                              reserved for hotplug bridge's IO window.
 +                              Default size is 256 bytes.
 +              hpmemsize=nn[KMG]       The fixed amount of bus space which is
 +                              reserved for hotplug bridge's memory window.
 +                              Default size is 2 megabytes.
                realloc=        Enable/disable reallocating PCI bridge resources
                                if allocations done by BIOS are too small to
                                accommodate resources required by all child
diff --combined include/linux/ftrace.h
index 52da2a250795f3aefc3d2acc927a229f8a662d59,832422d706f41073420defb07c11cf914f3660b6..f83e17a40e8b848185b649836274b57acdc332c1
@@@ -89,7 -89,6 +89,7 @@@ typedef void (*ftrace_func_t)(unsigned 
   *            that the call back has its own recursion protection. If it does
   *            not set this, then the ftrace infrastructure will add recursion
   *            protection for the caller.
 + * STUB   - The ftrace_ops is just a place holder.
   */
  enum {
        FTRACE_OPS_FL_ENABLED                   = 1 << 0,
@@@ -99,7 -98,6 +99,7 @@@
        FTRACE_OPS_FL_SAVE_REGS                 = 1 << 4,
        FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED    = 1 << 5,
        FTRACE_OPS_FL_RECURSION_SAFE            = 1 << 6,
 +      FTRACE_OPS_FL_STUB                      = 1 << 7,
  };
  
  struct ftrace_ops {
@@@ -261,8 -259,10 +261,10 @@@ struct ftrace_probe_ops 
        void                    (*func)(unsigned long ip,
                                        unsigned long parent_ip,
                                        void **data);
-       int                     (*callback)(unsigned long ip, void **data);
-       void                    (*free)(void **data);
+       int                     (*init)(struct ftrace_probe_ops *ops,
+                                       unsigned long ip, void **data);
+       void                    (*free)(struct ftrace_probe_ops *ops,
+                                       unsigned long ip, void **data);
        int                     (*print)(struct seq_file *m,
                                         unsigned long ip,
                                         struct ftrace_probe_ops *ops,
@@@ -396,6 -396,7 +398,6 @@@ ssize_t ftrace_filter_write(struct fil
                            size_t cnt, loff_t *ppos);
  ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
                             size_t cnt, loff_t *ppos);
 -loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence);
  int ftrace_regex_release(struct inode *inode, struct file *file);
  
  void __init
@@@ -568,8 -569,6 +570,8 @@@ static inline in
  ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
  #endif /* CONFIG_DYNAMIC_FTRACE */
  
 +loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence);
 +
  /* totally disable ftrace - can not re-enable after this */
  void ftrace_kill(void);
  
diff --combined include/linux/kernel.h
index 79fdd80a42d4f001157bc70b9570aa5eda0b8f6c,239dbb9627caecdd0e1b59d5a218d5ad83a0380f..2dac79c3919903c84b09ae8cc9f3148bbaf00214
@@@ -390,6 -390,7 +390,6 @@@ extern struct pid *session_of_pgrp(stru
  unsigned long int_sqrt(unsigned long);
  
  extern void bust_spinlocks(int yes);
 -extern void wake_up_klogd(void);
  extern int oops_in_progress;          /* If set, an oops, panic(), BUG() or die() is in progress */
  extern int panic_timeout;
  extern int panic_on_oops;
@@@ -397,11 -398,7 +397,11 @@@ extern int panic_on_unrecovered_nmi
  extern int panic_on_io_nmi;
  extern int sysctl_panic_on_stackoverflow;
  extern const char *print_tainted(void);
 -extern void add_taint(unsigned flag);
 +enum lockdep_ok {
 +      LOCKDEP_STILL_OK,
 +      LOCKDEP_NOW_UNRELIABLE
 +};
 +extern void add_taint(unsigned flag, enum lockdep_ok);
  extern int test_taint(unsigned flag);
  extern unsigned long get_taint(void);
  extern int root_mountflags;
@@@ -486,6 -483,8 +486,8 @@@ enum ftrace_dump_mode 
  void tracing_on(void);
  void tracing_off(void);
  int tracing_is_on(void);
+ void tracing_snapshot(void);
+ void tracing_snapshot_alloc(void);
  
  extern void tracing_start(void);
  extern void tracing_stop(void);
@@@ -515,10 -514,32 +517,32 @@@ do {                                                                    
   *
   * This is intended as a debugging tool for the developer only.
   * Please refrain from leaving trace_printks scattered around in
-  * your code.
+  * your code. (Extra memory is used for special buffers that are
+  * allocated when trace_printk() is used)
+  *
+  * A little optization trick is done here. If there's only one
+  * argument, there's no need to scan the string for printf formats.
+  * The trace_puts() will suffice. But how can we take advantage of
+  * using trace_puts() when trace_printk() has only one argument?
+  * By stringifying the args and checking the size we can tell
+  * whether or not there are args. __stringify((__VA_ARGS__)) will
+  * turn into "()\0" with a size of 3 when there are no args, anything
+  * else will be bigger. All we need to do is define a string to this,
+  * and then take its size and compare to 3. If it's bigger, use
+  * do_trace_printk() otherwise, optimize it to trace_puts(). Then just
+  * let gcc optimize the rest.
   */
  
- #define trace_printk(fmt, args...)                                    \
+ #define trace_printk(fmt, ...)                                \
+ do {                                                  \
+       char _______STR[] = __stringify((__VA_ARGS__)); \
+       if (sizeof(_______STR) > 3)                     \
+               do_trace_printk(fmt, ##__VA_ARGS__);    \
+       else                                            \
+               trace_puts(fmt);                        \
+ } while (0)
+ #define do_trace_printk(fmt, args...)                                 \
  do {                                                                  \
        static const char *trace_printk_fmt                             \
                __attribute__((section("__trace_printk_fmt"))) =        \
@@@ -538,7 -559,45 +562,45 @@@ int __trace_bprintk(unsigned long ip, c
  extern __printf(2, 3)
  int __trace_printk(unsigned long ip, const char *fmt, ...);
  
- extern void trace_dump_stack(void);
+ /**
+  * trace_puts - write a string into the ftrace buffer
+  * @str: the string to record
+  *
+  * Note: __trace_bputs is an internal function for trace_puts and
+  *       the @ip is passed in via the trace_puts macro.
+  *
+  * This is similar to trace_printk() but is made for those really fast
+  * paths that a developer wants the least amount of "Heisenbug" affects,
+  * where the processing of the print format is still too much.
+  *
+  * This function allows a kernel developer to debug fast path sections
+  * that printk is not appropriate for. By scattering in various
+  * printk like tracing in the code, a developer can quickly see
+  * where problems are occurring.
+  *
+  * This is intended as a debugging tool for the developer only.
+  * Please refrain from leaving trace_puts scattered around in
+  * your code. (Extra memory is used for special buffers that are
+  * allocated when trace_puts() is used)
+  *
+  * Returns: 0 if nothing was written, positive # if string was.
+  *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
+  */
+ extern int __trace_bputs(unsigned long ip, const char *str);
+ extern int __trace_puts(unsigned long ip, const char *str, int size);
+ #define trace_puts(str) ({                                            \
+       static const char *trace_printk_fmt                             \
+               __attribute__((section("__trace_printk_fmt"))) =        \
+               __builtin_constant_p(str) ? str : NULL;                 \
+                                                                       \
+       if (__builtin_constant_p(str))                                  \
+               __trace_bputs(_THIS_IP_, trace_printk_fmt);             \
+       else                                                            \
+               __trace_puts(_THIS_IP_, str, strlen(str));              \
+ })
+ extern void trace_dump_stack(int skip);
  
  /*
   * The double __builtin_constant_p is because gcc will give us an error
@@@ -573,6 -632,8 +635,8 @@@ static inline void trace_dump_stack(voi
  static inline void tracing_on(void) { }
  static inline void tracing_off(void) { }
  static inline int tracing_is_on(void) { return 0; }
+ static inline void tracing_snapshot(void) { }
+ static inline void tracing_snapshot_alloc(void) { }
  
  static inline __printf(1, 2)
  int trace_printk(const char *fmt, ...)
diff --combined kernel/trace/Kconfig
index fc382d6e2765d4a454a85aa5591c8edc452dccfb,0b5ecf5517fa6d3bb1464a9f183396b07253fbd8..5e9efd4b83a47fda4e70825078baabbc090a9a72
@@@ -81,6 -81,21 +81,6 @@@ config EVENT_TRACIN
        select CONTEXT_SWITCH_TRACER
        bool
  
 -config EVENT_POWER_TRACING_DEPRECATED
 -      depends on EVENT_TRACING
 -      bool "Deprecated power event trace API, to be removed"
 -      default y
 -      help
 -        Provides old power event types:
 -        C-state/idle accounting events:
 -        power:power_start
 -        power:power_end
 -        and old cpufreq accounting event:
 -        power:power_frequency
 -        This is for userspace compatibility
 -        and will vanish after 5 kernel iterations,
 -        namely 3.1.
 -
  config CONTEXT_SWITCH_TRACER
        bool
  
@@@ -176,6 -191,8 +176,8 @@@ config IRQSOFF_TRACE
        select GENERIC_TRACER
        select TRACER_MAX_TRACE
        select RING_BUFFER_ALLOW_SWAP
+       select TRACER_SNAPSHOT
+       select TRACER_SNAPSHOT_PER_CPU_SWAP
        help
          This option measures the time spent in irqs-off critical
          sections, with microsecond accuracy.
@@@ -198,6 -215,8 +200,8 @@@ config PREEMPT_TRACE
        select GENERIC_TRACER
        select TRACER_MAX_TRACE
        select RING_BUFFER_ALLOW_SWAP
+       select TRACER_SNAPSHOT
+       select TRACER_SNAPSHOT_PER_CPU_SWAP
        help
          This option measures the time spent in preemption-off critical
          sections, with microsecond accuracy.
@@@ -217,6 -236,7 +221,7 @@@ config SCHED_TRACE
        select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
        select TRACER_MAX_TRACE
+       select TRACER_SNAPSHOT
        help
          This tracer tracks the latency of the highest priority task
          to be scheduled in, starting from the point it has woken up.
@@@ -248,6 -268,27 +253,27 @@@ config TRACER_SNAPSHO
              echo 1 > /sys/kernel/debug/tracing/snapshot
              cat snapshot
  
+ config TRACER_SNAPSHOT_PER_CPU_SWAP
+         bool "Allow snapshot to swap per CPU"
+       depends on TRACER_SNAPSHOT
+       select RING_BUFFER_ALLOW_SWAP
+       help
+         Allow doing a snapshot of a single CPU buffer instead of a
+         full swap (all buffers). If this is set, then the following is
+         allowed:
+             echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
+         After which, only the tracing buffer for CPU 2 was swapped with
+         the main tracing buffer, and the other CPU buffers remain the same.
+         When this is enabled, this adds a little more overhead to the
+         trace recording, as it needs to add some checks to synchronize
+         recording with swaps. But this does not affect the performance
+         of the overall system. This is enabled by default when the preempt
+         or irq latency tracers are enabled, as those need to swap as well
+         and already adds the overhead (plus a lot more).
  config TRACE_BRANCH_PROFILING
        bool
        select GENERIC_TRACER
@@@ -524,6 -565,29 +550,29 @@@ config RING_BUFFER_BENCHMAR
  
          If unsure, say N.
  
+ config RING_BUFFER_STARTUP_TEST
+        bool "Ring buffer startup self test"
+        depends on RING_BUFFER
+        help
+          Run a simple self test on the ring buffer on boot up. Late in the
+        kernel boot sequence, the test will start that kicks off
+        a thread per cpu. Each thread will write various size events
+        into the ring buffer. Another thread is created to send IPIs
+        to each of the threads, where the IPI handler will also write
+        to the ring buffer, to test/stress the nesting ability.
+        If any anomalies are discovered, a warning will be displayed
+        and all ring buffers will be disabled.
+        The test runs for 10 seconds. This will slow your boot time
+        by at least 10 more seconds.
+        At the end of the test, statics and more checks are done.
+        It will output the stats of each per cpu buffer. What
+        was written, the sizes, what was read, what was lost, and
+        other similar details.
+        If unsure, say N
  endif # FTRACE
  
  endif # TRACING_SUPPORT
diff --combined kernel/trace/blktrace.c
index 5a0f781cd729870892d105f24cc552a9d28b23e6,90a55054744cbab49bd3d7439562eae744f296a3..ed58a3216a6dd04ffe5c7055e850bf691a62c768
@@@ -72,7 -72,7 +72,7 @@@ static void trace_note(struct blk_trac
        bool blk_tracer = blk_tracer_enabled;
  
        if (blk_tracer) {
-               buffer = blk_tr->buffer;
+               buffer = blk_tr->trace_buffer.buffer;
                pc = preempt_count();
                event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
                                                  sizeof(*t) + len,
@@@ -218,7 -218,7 +218,7 @@@ static void __blk_add_trace(struct blk_
        if (blk_tracer) {
                tracing_record_cmdline(current);
  
-               buffer = blk_tr->buffer;
+               buffer = blk_tr->trace_buffer.buffer;
                pc = preempt_count();
                event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
                                                  sizeof(*t) + pdu_len,
@@@ -783,7 -783,6 +783,7 @@@ static void blk_add_trace_bio_complete(
  
  static void blk_add_trace_bio_backmerge(void *ignore,
                                        struct request_queue *q,
 +                                      struct request *rq,
                                        struct bio *bio)
  {
        blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
  
  static void blk_add_trace_bio_frontmerge(void *ignore,
                                         struct request_queue *q,
 +                                       struct request *rq,
                                         struct bio *bio)
  {
        blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
diff --combined kernel/trace/ftrace.c
index b3fde6d7b7fc47683244a5a432ab35a543551499,9b44abb2c5a0bf585ff73de22c07031af35b76da..8a5c017bb50c141bfca4d8206ac2a1805d224185
@@@ -66,7 -66,7 +66,7 @@@
  
  static struct ftrace_ops ftrace_list_end __read_mostly = {
        .func           = ftrace_stub,
 -      .flags          = FTRACE_OPS_FL_RECURSION_SAFE,
 +      .flags          = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
  };
  
  /* ftrace_enabled is a method to turn ftrace on or off */
@@@ -486,7 -486,6 +486,6 @@@ struct ftrace_profile_stat 
  #define PROFILES_PER_PAGE                                     \
        (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
  
- static int ftrace_profile_bits __read_mostly;
  static int ftrace_profile_enabled __read_mostly;
  
  /* ftrace_profile_lock - synchronize the enable and disable of the profiler */
@@@ -494,7 -493,8 +493,8 @@@ static DEFINE_MUTEX(ftrace_profile_lock
  
  static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
  
- #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+ #define FTRACE_PROFILE_HASH_BITS 10
+ #define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
  
  static void *
  function_stat_next(void *v, int idx)
@@@ -676,7 -676,7 +676,7 @@@ int ftrace_profile_pages_init(struct ft
  
        pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
  
-       for (i = 0; i < pages; i++) {
+       for (i = 1; i < pages; i++) {
                pg->next = (void *)get_zeroed_page(GFP_KERNEL);
                if (!pg->next)
                        goto out_free;
                free_page(tmp);
        }
  
 -      free_page((unsigned long)stat->pages);
        stat->pages = NULL;
        stat->start = NULL;
  
@@@ -724,13 -725,6 +724,6 @@@ static int ftrace_profile_init_cpu(int 
        if (!stat->hash)
                return -ENOMEM;
  
-       if (!ftrace_profile_bits) {
-               size--;
-               for (; size; size >>= 1)
-                       ftrace_profile_bits++;
-       }
        /* Preallocate the function profiling pages */
        if (ftrace_profile_pages_init(stat) < 0) {
                kfree(stat->hash);
@@@ -761,15 -755,16 +754,15 @@@ ftrace_find_profiled_func(struct ftrace
  {
        struct ftrace_profile *rec;
        struct hlist_head *hhd;
 -      struct hlist_node *n;
        unsigned long key;
  
-       key = hash_long(ip, ftrace_profile_bits);
+       key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
        hhd = &stat->hash[key];
  
        if (hlist_empty(hhd))
                return NULL;
  
 -      hlist_for_each_entry_rcu(rec, n, hhd, node) {
 +      hlist_for_each_entry_rcu(rec, hhd, node) {
                if (rec->ip == ip)
                        return rec;
        }
@@@ -782,7 -777,7 +775,7 @@@ static void ftrace_add_profile(struct f
  {
        unsigned long key;
  
-       key = hash_long(rec->ip, ftrace_profile_bits);
+       key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
        hlist_add_head_rcu(&rec->node, &stat->hash[key]);
  }
  
@@@ -1052,19 -1047,6 +1045,19 @@@ static __init void ftrace_profile_debug
  
  static struct pid * const ftrace_swapper_pid = &init_struct_pid;
  
 +loff_t
 +ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
 +{
 +      loff_t ret;
 +
 +      if (file->f_mode & FMODE_READ)
 +              ret = seq_lseek(file, offset, whence);
 +      else
 +              file->f_pos = ret = 1;
 +
 +      return ret;
 +}
 +
  #ifdef CONFIG_DYNAMIC_FTRACE
  
  #ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@@ -1079,7 -1061,7 +1072,7 @@@ struct ftrace_func_probe 
        unsigned long           flags;
        unsigned long           ip;
        void                    *data;
-       struct rcu_head         rcu;
+       struct list_head        free_list;
  };
  
  struct ftrace_func_entry {
@@@ -1144,6 -1126,7 +1137,6 @@@ ftrace_lookup_ip(struct ftrace_hash *ha
        unsigned long key;
        struct ftrace_func_entry *entry;
        struct hlist_head *hhd;
 -      struct hlist_node *n;
  
        if (ftrace_hash_empty(hash))
                return NULL;
  
        hhd = &hash->buckets[key];
  
 -      hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
 +      hlist_for_each_entry_rcu(entry, hhd, hlist) {
                if (entry->ip == ip)
                        return entry;
        }
@@@ -1212,7 -1195,7 +1205,7 @@@ remove_hash_entry(struct ftrace_hash *h
  static void ftrace_hash_clear(struct ftrace_hash *hash)
  {
        struct hlist_head *hhd;
 -      struct hlist_node *tp, *tn;
 +      struct hlist_node *tn;
        struct ftrace_func_entry *entry;
        int size = 1 << hash->size_bits;
        int i;
  
        for (i = 0; i < size; i++) {
                hhd = &hash->buckets[i];
 -              hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
 +              hlist_for_each_entry_safe(entry, tn, hhd, hlist)
                        free_hash_entry(hash, entry);
        }
        FTRACE_WARN_ON(hash->count);
@@@ -1285,6 -1268,7 +1278,6 @@@ alloc_and_copy_ftrace_hash(int size_bit
  {
        struct ftrace_func_entry *entry;
        struct ftrace_hash *new_hash;
 -      struct hlist_node *tp;
        int size;
        int ret;
        int i;
  
        size = 1 << hash->size_bits;
        for (i = 0; i < size; i++) {
 -              hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) {
 +              hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
                        ret = add_hash_entry(new_hash, entry->ip);
                        if (ret < 0)
                                goto free_hash;
@@@ -1325,11 -1309,10 +1318,10 @@@ ftrace_hash_move(struct ftrace_ops *ops
                 struct ftrace_hash **dst, struct ftrace_hash *src)
  {
        struct ftrace_func_entry *entry;
 -      struct hlist_node *tp, *tn;
 +      struct hlist_node *tn;
        struct hlist_head *hhd;
        struct ftrace_hash *old_hash;
        struct ftrace_hash *new_hash;
-       unsigned long key;
        int size = src->count;
        int bits = 0;
        int ret;
        size = 1 << src->size_bits;
        for (i = 0; i < size; i++) {
                hhd = &src->buckets[i];
 -              hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) {
 +              hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
-                       if (bits > 0)
-                               key = hash_long(entry->ip, bits);
-                       else
-                               key = 0;
                        remove_hash_entry(src, entry);
                        __add_hash_entry(new_hash, entry);
                }
@@@ -2625,7 -2604,7 +2613,7 @@@ static void ftrace_filter_reset(struct 
   * routine, you can use ftrace_filter_write() for the write
   * routine if @flag has FTRACE_ITER_FILTER set, or
   * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
 - * ftrace_regex_lseek() should be used as the lseek routine, and
 + * ftrace_filter_lseek() should be used as the lseek routine, and
   * release must call ftrace_regex_release().
   */
  int
@@@ -2709,6 -2688,19 +2697,6 @@@ ftrace_notrace_open(struct inode *inode
                                 inode, file);
  }
  
 -loff_t
 -ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 -{
 -      loff_t ret;
 -
 -      if (file->f_mode & FMODE_READ)
 -              ret = seq_lseek(file, offset, whence);
 -      else
 -              file->f_pos = ret = 1;
 -
 -      return ret;
 -}
 -
  static int ftrace_match(char *str, char *regex, int len, int type)
  {
        int matched = 0;
@@@ -2897,6 -2889,7 +2885,6 @@@ static void function_trace_probe_call(u
  {
        struct ftrace_func_probe *entry;
        struct hlist_head *hhd;
 -      struct hlist_node *n;
        unsigned long key;
  
        key = hash_long(ip, FTRACE_HASH_BITS);
         * on the hash. rcu_read_lock is too dangerous here.
         */
        preempt_disable_notrace();
 -      hlist_for_each_entry_rcu(entry, n, hhd, node) {
 +      hlist_for_each_entry_rcu(entry, hhd, node) {
                if (entry->ip == ip)
                        entry->ops->func(ip, parent_ip, &entry->data);
        }
@@@ -2973,28 -2966,27 +2961,27 @@@ static void __disable_ftrace_function_p
  }
  
  
- static void ftrace_free_entry_rcu(struct rcu_head *rhp)
+ static void ftrace_free_entry(struct ftrace_func_probe *entry)
  {
-       struct ftrace_func_probe *entry =
-               container_of(rhp, struct ftrace_func_probe, rcu);
        if (entry->ops->free)
-               entry->ops->free(&entry->data);
+               entry->ops->free(entry->ops, entry->ip, &entry->data);
        kfree(entry);
  }
  
  int
  register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                              void *data)
  {
        struct ftrace_func_probe *entry;
+       struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+       struct ftrace_hash *hash;
        struct ftrace_page *pg;
        struct dyn_ftrace *rec;
        int type, len, not;
        unsigned long key;
        int count = 0;
        char *search;
+       int ret;
  
        type = filter_parse_regex(glob, strlen(glob), &search, &not);
        len = strlen(search);
  
        mutex_lock(&ftrace_lock);
  
-       if (unlikely(ftrace_disabled))
+       hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
+       if (!hash) {
+               count = -ENOMEM;
                goto out_unlock;
+       }
+       if (unlikely(ftrace_disabled)) {
+               count = -ENODEV;
+               goto out_unlock;
+       }
  
        do_for_each_ftrace_rec(pg, rec) {
  
                 * for each function we find. We call the callback
                 * to give the caller an opportunity to do so.
                 */
-               if (ops->callback) {
-                       if (ops->callback(rec->ip, &entry->data) < 0) {
+               if (ops->init) {
+                       if (ops->init(ops, rec->ip, &entry->data) < 0) {
                                /* caller does not like this func */
                                kfree(entry);
                                continue;
                        }
                }
  
+               ret = enter_record(hash, rec, 0);
+               if (ret < 0) {
+                       kfree(entry);
+                       count = ret;
+                       goto out_unlock;
+               }
                entry->ops = ops;
                entry->ip = rec->ip;
  
                hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
  
        } while_for_each_ftrace_rec();
+       ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+       if (ret < 0)
+               count = ret;
        __enable_ftrace_function_probe();
  
   out_unlock:
        mutex_unlock(&ftrace_lock);
+       free_ftrace_hash(hash);
  
        return count;
  }
@@@ -3062,8 -3075,13 +3070,13 @@@ static voi
  __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                                  void *data, int flags)
  {
+       struct ftrace_func_entry *rec_entry;
        struct ftrace_func_probe *entry;
 -      struct hlist_node *n, *tmp;
+       struct ftrace_func_probe *p;
+       struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+       struct list_head free_list;
+       struct ftrace_hash *hash;
 +      struct hlist_node *tmp;
        char str[KSYM_SYMBOL_LEN];
        int type = MATCH_FULL;
        int i, len = 0;
        }
  
        mutex_lock(&ftrace_lock);
+       hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
+       if (!hash)
+               /* Hmm, should report this somehow */
+               goto out_unlock;
+       INIT_LIST_HEAD(&free_list);
        for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
                struct hlist_head *hhd = &ftrace_func_hash[i];
  
 -              hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
 +              hlist_for_each_entry_safe(entry, tmp, hhd, node) {
  
                        /* break up if statements for readability */
                        if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
                                        continue;
                        }
  
+                       rec_entry = ftrace_lookup_ip(hash, entry->ip);
+                       /* It is possible more than one entry had this ip */
+                       if (rec_entry)
+                               free_hash_entry(hash, rec_entry);
                        hlist_del_rcu(&entry->node);
-                       call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);
+                       list_add(&entry->free_list, &free_list);
                }
        }
        __disable_ftrace_function_probe();
+       /*
+        * Remove after the disable is called. Otherwise, if the last
+        * probe is removed, a null hash means *all enabled*.
+        */
+       ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+       synchronize_sched();
+       list_for_each_entry_safe(entry, p, &free_list, free_list) {
+               list_del(&entry->free_list);
+               ftrace_free_entry(entry);
+       }
+               
+  out_unlock:
        mutex_unlock(&ftrace_lock);
+       free_ftrace_hash(hash);
  }
  
  void
@@@ -3570,7 -3614,7 +3609,7 @@@ static const struct file_operations ftr
        .open = ftrace_filter_open,
        .read = seq_read,
        .write = ftrace_filter_write,
 -      .llseek = ftrace_regex_lseek,
 +      .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
  };
  
@@@ -3578,7 -3622,7 +3617,7 @@@ static const struct file_operations ftr
        .open = ftrace_notrace_open,
        .read = seq_read,
        .write = ftrace_notrace_write,
 -      .llseek = ftrace_regex_lseek,
 +      .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
  };
  
@@@ -3736,7 -3780,8 +3775,8 @@@ out
        if (fail)
                return -EINVAL;
  
-       ftrace_graph_filter_enabled = 1;
+       ftrace_graph_filter_enabled = !!(*idx);
        return 0;
  }
  
@@@ -3783,8 -3828,8 +3823,8 @@@ static const struct file_operations ftr
        .open           = ftrace_graph_open,
        .read           = seq_read,
        .write          = ftrace_graph_write,
 +      .llseek         = ftrace_filter_lseek,
        .release        = ftrace_graph_release,
 -      .llseek         = seq_lseek,
  };
  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
  
@@@ -4130,8 -4175,7 +4170,8 @@@ ftrace_ops_control_func(unsigned long i
        preempt_disable_notrace();
        trace_recursion_set(TRACE_CONTROL_BIT);
        do_for_each_ftrace_op(op, ftrace_control_list) {
 -              if (!ftrace_function_local_disabled(op) &&
 +              if (!(op->flags & FTRACE_OPS_FL_STUB) &&
 +                  !ftrace_function_local_disabled(op) &&
                    ftrace_ops_test(op, ip))
                        op->func(ip, parent_ip, op, regs);
        } while_for_each_ftrace_op(op);
@@@ -4439,7 -4483,7 +4479,7 @@@ static const struct file_operations ftr
        .open           = ftrace_pid_open,
        .write          = ftrace_pid_write,
        .read           = seq_read,
 -      .llseek         = seq_lseek,
 +      .llseek         = ftrace_filter_lseek,
        .release        = ftrace_pid_release,
  };
  
@@@ -4555,8 -4599,12 +4595,8 @@@ ftrace_enable_sysctl(struct ctl_table *
                ftrace_startup_sysctl();
  
                /* we are starting ftrace again */
 -              if (ftrace_ops_list != &ftrace_list_end) {
 -                      if (ftrace_ops_list->next == &ftrace_list_end)
 -                              ftrace_trace_function = ftrace_ops_list->func;
 -                      else
 -                              ftrace_trace_function = ftrace_ops_list_func;
 -              }
 +              if (ftrace_ops_list != &ftrace_list_end)
 +                      update_ftrace_function();
  
        } else {
                /* stopping ftrace calls (just send to ftrace_stub) */
index 6989df2ba1947bf58879ecf2d5cc4261c9fdfc2c,e5472f7bc347de782db2746233020a54ab43d9db..b59aea2c48c287f5de894efcba7d53c02fd6f279
@@@ -8,13 -8,16 +8,16 @@@
  #include <linux/trace_clock.h>
  #include <linux/trace_seq.h>
  #include <linux/spinlock.h>
+ #include <linux/irq_work.h>
  #include <linux/debugfs.h>
  #include <linux/uaccess.h>
  #include <linux/hardirq.h>
+ #include <linux/kthread.h>    /* for self test */
  #include <linux/kmemcheck.h>
  #include <linux/module.h>
  #include <linux/percpu.h>
  #include <linux/mutex.h>
+ #include <linux/delay.h>
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/hash.h>
@@@ -178,7 -181,7 +181,7 @@@ void tracing_off_permanent(void
  #define RB_MAX_SMALL_DATA     (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
  #define RB_EVNT_MIN_SIZE      8U      /* two 32bit words */
  
 -#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
  # define RB_FORCE_8BYTE_ALIGNMENT     0
  # define RB_ARCH_ALIGNMENT            RB_ALIGNMENT
  #else
  # define RB_ARCH_ALIGNMENT            8U
  #endif
  
 +#define RB_ALIGN_DATA         __aligned(RB_ARCH_ALIGNMENT)
 +
  /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
  #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
  
@@@ -336,7 -337,7 +339,7 @@@ EXPORT_SYMBOL_GPL(ring_buffer_event_dat
  struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
        local_t          commit;        /* write committed index */
 -      unsigned char    data[];        /* data of buffer page */
 +      unsigned char    data[] RB_ALIGN_DATA;  /* data of buffer page */
  };
  
  /*
@@@ -444,6 -445,12 +447,12 @@@ int ring_buffer_print_page_header(struc
        return ret;
  }
  
+ struct rb_irq_work {
+       struct irq_work                 work;
+       wait_queue_head_t               waiters;
+       bool                            waiters_pending;
+ };
  /*
   * head_page == tail_page && head == tail then buffer is empty.
   */
@@@ -478,6 -485,8 +487,8 @@@ struct ring_buffer_per_cpu 
        struct list_head                new_pages; /* new pages to add */
        struct work_struct              update_pages_work;
        struct completion               update_done;
+       struct rb_irq_work              irq_work;
  };
  
  struct ring_buffer {
        struct notifier_block           cpu_notify;
  #endif
        u64                             (*clock)(void);
+       struct rb_irq_work              irq_work;
  };
  
  struct ring_buffer_iter {
        u64                             read_stamp;
  };
  
+ /*
+  * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
+  *
+  * Schedules a delayed work to wake up any task that is blocked on the
+  * ring buffer waiters queue.
+  */
+ static void rb_wake_up_waiters(struct irq_work *work)
+ {
+       struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+       wake_up_all(&rbwork->waiters);
+ }
+ /**
+  * ring_buffer_wait - wait for input to the ring buffer
+  * @buffer: buffer to wait on
+  * @cpu: the cpu buffer to wait on
+  *
+  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+  * as data is added to any of the @buffer's cpu buffers. Otherwise
+  * it will wait for data to be added to a specific cpu buffer.
+  */
+ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+ {
+       struct ring_buffer_per_cpu *cpu_buffer;
+       DEFINE_WAIT(wait);
+       struct rb_irq_work *work;
+       /*
+        * Depending on what the caller is waiting for, either any
+        * data in any cpu buffer, or a specific buffer, put the
+        * caller on the appropriate wait queue.
+        */
+       if (cpu == RING_BUFFER_ALL_CPUS)
+               work = &buffer->irq_work;
+       else {
+               cpu_buffer = buffer->buffers[cpu];
+               work = &cpu_buffer->irq_work;
+       }
+       prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+       /*
+        * The events can happen in critical sections where
+        * checking a work queue can cause deadlocks.
+        * After adding a task to the queue, this flag is set
+        * only to notify events to try to wake up the queue
+        * using irq_work.
+        *
+        * We don't clear it even if the buffer is no longer
+        * empty. The flag only causes the next event to run
+        * irq_work to do the work queue wake up. The worse
+        * that can happen if we race with !trace_empty() is that
+        * an event will cause an irq_work to try to wake up
+        * an empty queue.
+        *
+        * There's no reason to protect this flag either, as
+        * the work queue and irq_work logic will do the necessary
+        * synchronization for the wake ups. The only thing
+        * that is necessary is that the wake up happens after
+        * a task has been queued. It's OK for spurious wake ups.
+        */
+       work->waiters_pending = true;
+       if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
+           (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
+               schedule();
+       finish_wait(&work->waiters, &wait);
+ }
+ /**
+  * ring_buffer_poll_wait - poll on buffer input
+  * @buffer: buffer to wait on
+  * @cpu: the cpu buffer to wait on
+  * @filp: the file descriptor
+  * @poll_table: The poll descriptor
+  *
+  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+  * as data is added to any of the @buffer's cpu buffers. Otherwise
+  * it will wait for data to be added to a specific cpu buffer.
+  *
+  * Returns POLLIN | POLLRDNORM if data exists in the buffers,
+  * zero otherwise.
+  */
+ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
+                         struct file *filp, poll_table *poll_table)
+ {
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct rb_irq_work *work;
+       if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
+           (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
+               return POLLIN | POLLRDNORM;
+       if (cpu == RING_BUFFER_ALL_CPUS)
+               work = &buffer->irq_work;
+       else {
+               cpu_buffer = buffer->buffers[cpu];
+               work = &cpu_buffer->irq_work;
+       }
+       work->waiters_pending = true;
+       poll_wait(filp, &work->waiters, poll_table);
+       if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
+           (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
+               return POLLIN | POLLRDNORM;
+       return 0;
+ }
  /* buffer may be either ring_buffer or ring_buffer_per_cpu */
  #define RB_WARN_ON(b, cond)                                           \
        ({                                                              \
@@@ -1063,6 -1186,8 +1188,8 @@@ rb_allocate_cpu_buffer(struct ring_buff
        cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
        init_completion(&cpu_buffer->update_done);
+       init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
+       init_waitqueue_head(&cpu_buffer->irq_work.waiters);
  
        bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
                            GFP_KERNEL, cpu_to_node(cpu));
@@@ -1158,6 -1283,9 +1285,9 @@@ struct ring_buffer *__ring_buffer_alloc
        buffer->clock = trace_clock_local;
        buffer->reader_lock_key = key;
  
+       init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
+       init_waitqueue_head(&buffer->irq_work.waiters);
        /* need at least two pages */
        if (nr_pages < 2)
                nr_pages = 2;
@@@ -1553,11 -1681,22 +1683,22 @@@ int ring_buffer_resize(struct ring_buff
                        if (!cpu_buffer->nr_pages_to_update)
                                continue;
  
-                       if (cpu_online(cpu))
+                       /* The update must run on the CPU that is being updated. */
+                       preempt_disable();
+                       if (cpu == smp_processor_id() || !cpu_online(cpu)) {
+                               rb_update_pages(cpu_buffer);
+                               cpu_buffer->nr_pages_to_update = 0;
+                       } else {
+                               /*
+                                * Can not disable preemption for schedule_work_on()
+                                * on PREEMPT_RT.
+                                */
+                               preempt_enable();
                                schedule_work_on(cpu,
                                                &cpu_buffer->update_pages_work);
-                       else
-                               rb_update_pages(cpu_buffer);
+                               preempt_disable();
+                       }
+                       preempt_enable();
                }
  
                /* wait for all the updates to complete */
  
                get_online_cpus();
  
-               if (cpu_online(cpu_id)) {
+               preempt_disable();
+               /* The update must run on the CPU that is being updated. */
+               if (cpu_id == smp_processor_id() || !cpu_online(cpu_id))
+                       rb_update_pages(cpu_buffer);
+               else {
+                       /*
+                        * Can not disable preemption for schedule_work_on()
+                        * on PREEMPT_RT.
+                        */
+                       preempt_enable();
                        schedule_work_on(cpu_id,
                                         &cpu_buffer->update_pages_work);
                        wait_for_completion(&cpu_buffer->update_done);
-               } else
-                       rb_update_pages(cpu_buffer);
+                       preempt_disable();
+               }
+               preempt_enable();
  
                cpu_buffer->nr_pages_to_update = 0;
                put_online_cpus();
@@@ -2612,6 -2761,22 +2763,22 @@@ static void rb_commit(struct ring_buffe
        rb_end_commit(cpu_buffer);
  }
  
+ static __always_inline void
+ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+ {
+       if (buffer->irq_work.waiters_pending) {
+               buffer->irq_work.waiters_pending = false;
+               /* irq_work_queue() supplies it's own memory barriers */
+               irq_work_queue(&buffer->irq_work.work);
+       }
+       if (cpu_buffer->irq_work.waiters_pending) {
+               cpu_buffer->irq_work.waiters_pending = false;
+               /* irq_work_queue() supplies it's own memory barriers */
+               irq_work_queue(&cpu_buffer->irq_work.work);
+       }
+ }
  /**
   * ring_buffer_unlock_commit - commit a reserved
   * @buffer: The buffer to commit to
@@@ -2631,6 -2796,8 +2798,8 @@@ int ring_buffer_unlock_commit(struct ri
  
        rb_commit(cpu_buffer, event);
  
+       rb_wakeups(buffer, cpu_buffer);
        trace_recursive_unlock();
  
        preempt_enable_notrace();
@@@ -2803,6 -2970,8 +2972,8 @@@ int ring_buffer_write(struct ring_buffe
  
        rb_commit(cpu_buffer, event);
  
+       rb_wakeups(buffer, cpu_buffer);
        ret = 0;
   out:
        preempt_enable_notrace();
@@@ -4467,3 -4636,320 +4638,320 @@@ static int rb_cpu_notify(struct notifie
        return NOTIFY_OK;
  }
  #endif
+ #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
+ /*
+  * This is a basic integrity check of the ring buffer.
+  * Late in the boot cycle this test will run when configured in.
+  * It will kick off a thread per CPU that will go into a loop
+  * writing to the per cpu ring buffer various sizes of data.
+  * Some of the data will be large items, some small.
+  *
+  * Another thread is created that goes into a spin, sending out
+  * IPIs to the other CPUs to also write into the ring buffer.
+  * this is to test the nesting ability of the buffer.
+  *
+  * Basic stats are recorded and reported. If something in the
+  * ring buffer should happen that's not expected, a big warning
+  * is displayed and all ring buffers are disabled.
+  */
+ static struct task_struct *rb_threads[NR_CPUS] __initdata;
+ struct rb_test_data {
+       struct ring_buffer      *buffer;
+       unsigned long           events;
+       unsigned long           bytes_written;
+       unsigned long           bytes_alloc;
+       unsigned long           bytes_dropped;
+       unsigned long           events_nested;
+       unsigned long           bytes_written_nested;
+       unsigned long           bytes_alloc_nested;
+       unsigned long           bytes_dropped_nested;
+       int                     min_size_nested;
+       int                     max_size_nested;
+       int                     max_size;
+       int                     min_size;
+       int                     cpu;
+       int                     cnt;
+ };
+ static struct rb_test_data rb_data[NR_CPUS] __initdata;
+ /* 1 meg per cpu */
+ #define RB_TEST_BUFFER_SIZE   1048576
+ static char rb_string[] __initdata =
+       "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
+       "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
+       "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
+ static bool rb_test_started __initdata;
+ struct rb_item {
+       int size;
+       char str[];
+ };
+ static __init int rb_write_something(struct rb_test_data *data, bool nested)
+ {
+       struct ring_buffer_event *event;
+       struct rb_item *item;
+       bool started;
+       int event_len;
+       int size;
+       int len;
+       int cnt;
+       /* Have nested writes different that what is written */
+       cnt = data->cnt + (nested ? 27 : 0);
+       /* Multiply cnt by ~e, to make some unique increment */
+       size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
+       len = size + sizeof(struct rb_item);
+       started = rb_test_started;
+       /* read rb_test_started before checking buffer enabled */
+       smp_rmb();
+       event = ring_buffer_lock_reserve(data->buffer, len);
+       if (!event) {
+               /* Ignore dropped events before test starts. */
+               if (started) {
+                       if (nested)
+                               data->bytes_dropped += len;
+                       else
+                               data->bytes_dropped_nested += len;
+               }
+               return len;
+       }
+       event_len = ring_buffer_event_length(event);
+       if (RB_WARN_ON(data->buffer, event_len < len))
+               goto out;
+       item = ring_buffer_event_data(event);
+       item->size = size;
+       memcpy(item->str, rb_string, size);
+       if (nested) {
+               data->bytes_alloc_nested += event_len;
+               data->bytes_written_nested += len;
+               data->events_nested++;
+               if (!data->min_size_nested || len < data->min_size_nested)
+                       data->min_size_nested = len;
+               if (len > data->max_size_nested)
+                       data->max_size_nested = len;
+       } else {
+               data->bytes_alloc += event_len;
+               data->bytes_written += len;
+               data->events++;
+               if (!data->min_size || len < data->min_size)
+                       data->max_size = len;
+               if (len > data->max_size)
+                       data->max_size = len;
+       }
+  out:
+       ring_buffer_unlock_commit(data->buffer, event);
+       return 0;
+ }
+ static __init int rb_test(void *arg)
+ {
+       struct rb_test_data *data = arg;
+       while (!kthread_should_stop()) {
+               rb_write_something(data, false);
+               data->cnt++;
+               set_current_state(TASK_INTERRUPTIBLE);
+               /* Now sleep between a min of 100-300us and a max of 1ms */
+               usleep_range(((data->cnt % 3) + 1) * 100, 1000);
+       }
+       return 0;
+ }
+ static __init void rb_ipi(void *ignore)
+ {
+       struct rb_test_data *data;
+       int cpu = smp_processor_id();
+       data = &rb_data[cpu];
+       rb_write_something(data, true);
+ }
+ static __init int rb_hammer_test(void *arg)
+ {
+       while (!kthread_should_stop()) {
+               /* Send an IPI to all cpus to write data! */
+               smp_call_function(rb_ipi, NULL, 1);
+               /* No sleep, but for non preempt, let others run */
+               schedule();
+       }
+       return 0;
+ }
+ static __init int test_ringbuffer(void)
+ {
+       struct task_struct *rb_hammer;
+       struct ring_buffer *buffer;
+       int cpu;
+       int ret = 0;
+       pr_info("Running ring buffer tests...\n");
+       buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
+       if (WARN_ON(!buffer))
+               return 0;
+       /* Disable buffer so that threads can't write to it yet */
+       ring_buffer_record_off(buffer);
+       for_each_online_cpu(cpu) {
+               rb_data[cpu].buffer = buffer;
+               rb_data[cpu].cpu = cpu;
+               rb_data[cpu].cnt = cpu;
+               rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
+                                                "rbtester/%d", cpu);
+               if (WARN_ON(!rb_threads[cpu])) {
+                       pr_cont("FAILED\n");
+                       ret = -1;
+                       goto out_free;
+               }
+               kthread_bind(rb_threads[cpu], cpu);
+               wake_up_process(rb_threads[cpu]);
+       }
+       /* Now create the rb hammer! */
+       rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
+       if (WARN_ON(!rb_hammer)) {
+               pr_cont("FAILED\n");
+               ret = -1;
+               goto out_free;
+       }
+       ring_buffer_record_on(buffer);
+       /*
+        * Show buffer is enabled before setting rb_test_started.
+        * Yes there's a small race window where events could be
+        * dropped and the thread wont catch it. But when a ring
+        * buffer gets enabled, there will always be some kind of
+        * delay before other CPUs see it. Thus, we don't care about
+        * those dropped events. We care about events dropped after
+        * the threads see that the buffer is active.
+        */
+       smp_wmb();
+       rb_test_started = true;
+       set_current_state(TASK_INTERRUPTIBLE);
+       /* Just run for 10 seconds */;
+       schedule_timeout(10 * HZ);
+       kthread_stop(rb_hammer);
+  out_free:
+       for_each_online_cpu(cpu) {
+               if (!rb_threads[cpu])
+                       break;
+               kthread_stop(rb_threads[cpu]);
+       }
+       if (ret) {
+               ring_buffer_free(buffer);
+               return ret;
+       }
+       /* Report! */
+       pr_info("finished\n");
+       for_each_online_cpu(cpu) {
+               struct ring_buffer_event *event;
+               struct rb_test_data *data = &rb_data[cpu];
+               struct rb_item *item;
+               unsigned long total_events;
+               unsigned long total_dropped;
+               unsigned long total_written;
+               unsigned long total_alloc;
+               unsigned long total_read = 0;
+               unsigned long total_size = 0;
+               unsigned long total_len = 0;
+               unsigned long total_lost = 0;
+               unsigned long lost;
+               int big_event_size;
+               int small_event_size;
+               ret = -1;
+               total_events = data->events + data->events_nested;
+               total_written = data->bytes_written + data->bytes_written_nested;
+               total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
+               total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
+               big_event_size = data->max_size + data->max_size_nested;
+               small_event_size = data->min_size + data->min_size_nested;
+               pr_info("CPU %d:\n", cpu);
+               pr_info("              events:    %ld\n", total_events);
+               pr_info("       dropped bytes:    %ld\n", total_dropped);
+               pr_info("       alloced bytes:    %ld\n", total_alloc);
+               pr_info("       written bytes:    %ld\n", total_written);
+               pr_info("       biggest event:    %d\n", big_event_size);
+               pr_info("      smallest event:    %d\n", small_event_size);
+               if (RB_WARN_ON(buffer, total_dropped))
+                       break;
+               ret = 0;
+               while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
+                       total_lost += lost;
+                       item = ring_buffer_event_data(event);
+                       total_len += ring_buffer_event_length(event);
+                       total_size += item->size + sizeof(struct rb_item);
+                       if (memcmp(&item->str[0], rb_string, item->size) != 0) {
+                               pr_info("FAILED!\n");
+                               pr_info("buffer had: %.*s\n", item->size, item->str);
+                               pr_info("expected:   %.*s\n", item->size, rb_string);
+                               RB_WARN_ON(buffer, 1);
+                               ret = -1;
+                               break;
+                       }
+                       total_read++;
+               }
+               if (ret)
+                       break;
+               ret = -1;
+               pr_info("         read events:   %ld\n", total_read);
+               pr_info("         lost events:   %ld\n", total_lost);
+               pr_info("        total events:   %ld\n", total_lost + total_read);
+               pr_info("  recorded len bytes:   %ld\n", total_len);
+               pr_info(" recorded size bytes:   %ld\n", total_size);
+               if (total_lost)
+                       pr_info(" With dropped events, record len and size may not match\n"
+                               " alloced and written from above\n");
+               if (!total_lost) {
+                       if (RB_WARN_ON(buffer, total_len != total_alloc ||
+                                      total_size != total_written))
+                               break;
+               }
+               if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
+                       break;
+               ret = 0;
+       }
+       if (!ret)
+               pr_info("Ring buffer PASSED!\n");
+       ring_buffer_free(buffer);
+       return 0;
+ }
+ late_initcall(test_ringbuffer);
+ #endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
diff --combined kernel/trace/trace.c
index 66338c4f7f4bdb6142c2b21a99f14292c708d050,72970793b40a09b2e44539dae8c80f51a8b7fa20..581630a6387d864ee6afd8ff77a78d69a7439ec4
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * ring buffer based function tracer
   *
-  * Copyright (C) 2007-2008 Steven Rostedt <[email protected]>
+  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
   * Copyright (C) 2008 Ingo Molnar <[email protected]>
   *
   * Originally taken from the RT patch by:
@@@ -19,7 -19,6 +19,6 @@@
  #include <linux/seq_file.h>
  #include <linux/notifier.h>
  #include <linux/irqflags.h>
- #include <linux/irq_work.h>
  #include <linux/debugfs.h>
  #include <linux/pagemap.h>
  #include <linux/hardirq.h>
@@@ -48,7 -47,7 +47,7 @@@
   * On boot up, the ring buffer is set to the minimum size, so that
   * we do not waste memory on systems that are not using tracing.
   */
int ring_buffer_expanded;
bool ring_buffer_expanded;
  
  /*
   * We need to change this state when a selftest is running.
@@@ -86,14 -85,6 +85,6 @@@ static int dummy_set_flag(u32 old_flags
   */
  static DEFINE_PER_CPU(bool, trace_cmdline_save);
  
- /*
-  * When a reader is waiting for data, then this variable is
-  * set to true.
-  */
- static bool trace_wakeup_needed;
- static struct irq_work trace_work_wakeup;
  /*
   * Kill all tracing for good (never come back).
   * It is initialized to 1 but will turn to zero if the initialization
@@@ -130,12 -121,14 +121,14 @@@ static int tracing_set_tracer(const cha
  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
  static char *default_bootup_tracer;
  
+ static bool allocate_snapshot;
  static int __init set_cmdline_ftrace(char *str)
  {
        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
        /* We are using ftrace early, expand it */
-       ring_buffer_expanded = 1;
+       ring_buffer_expanded = true;
        return 1;
  }
  __setup("ftrace=", set_cmdline_ftrace);
@@@ -156,6 -149,15 +149,15 @@@ static int __init set_ftrace_dump_on_oo
  }
  __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
  
+ static int __init boot_alloc_snapshot(char *str)
+ {
+       allocate_snapshot = true;
+       /* We also need the main ring buffer expanded */
+       ring_buffer_expanded = true;
+       return 1;
+ }
+ __setup("alloc_snapshot", boot_alloc_snapshot);
  
  static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
  static char *trace_boot_options __initdata;
@@@ -189,7 -191,7 +191,7 @@@ unsigned long long ns2usecs(cycle_t nse
   */
  static struct trace_array     global_trace;
  
static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
LIST_HEAD(ftrace_trace_arrays);
  
  int filter_current_check_discard(struct ring_buffer *buffer,
                                 struct ftrace_event_call *call, void *rec,
@@@ -204,29 -206,15 +206,15 @@@ cycle_t ftrace_now(int cpu
        u64 ts;
  
        /* Early boot up does not have a buffer yet */
-       if (!global_trace.buffer)
+       if (!global_trace.trace_buffer.buffer)
                return trace_clock_local();
  
-       ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
-       ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
+       ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
+       ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
  
        return ts;
  }
  
- /*
-  * The max_tr is used to snapshot the global_trace when a maximum
-  * latency is reached. Some tracers will use this to store a maximum
-  * trace while it continues examining live traces.
-  *
-  * The buffers for the max_tr are set up the same as the global_trace.
-  * When a snapshot is taken, the link list of the max_tr is swapped
-  * with the link list of the global_trace and the buffers are reset for
-  * the global_trace so the tracing can continue.
-  */
- static struct trace_array     max_tr;
- static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
  int tracing_is_enabled(void)
  {
        return tracing_is_on();
@@@ -249,9 -237,6 +237,6 @@@ static unsigned long               trace_buf_size = 
  /* trace_types holds a link list of available tracers. */
  static struct tracer          *trace_types __read_mostly;
  
- /* current_trace points to the tracer that is currently active */
- static struct tracer          *current_trace __read_mostly = &nop_trace;
  /*
   * trace_types_lock is used to protect the trace_types list.
   */
@@@ -285,13 -270,13 +270,13 @@@ static DEFINE_PER_CPU(struct mutex, cpu
  
  static inline void trace_access_lock(int cpu)
  {
-       if (cpu == TRACE_PIPE_ALL_CPU) {
+       if (cpu == RING_BUFFER_ALL_CPUS) {
                /* gain it for accessing the whole ring buffer. */
                down_write(&all_cpu_access_lock);
        } else {
                /* gain it for accessing a cpu ring buffer. */
  
-               /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+               /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
                down_read(&all_cpu_access_lock);
  
                /* Secondly block other access to this @cpu ring buffer. */
  
  static inline void trace_access_unlock(int cpu)
  {
-       if (cpu == TRACE_PIPE_ALL_CPU) {
+       if (cpu == RING_BUFFER_ALL_CPUS) {
                up_write(&all_cpu_access_lock);
        } else {
                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
@@@ -339,30 -324,11 +324,11 @@@ static inline void trace_access_lock_in
  
  #endif
  
- /* trace_wait is a waitqueue for tasks blocked on trace_poll */
- static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
  /* trace_flags holds trace_options default values */
  unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
        TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
        TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
-       TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
- static int trace_stop_count;
- static DEFINE_RAW_SPINLOCK(tracing_start_lock);
- /**
-  * trace_wake_up - wake up tasks waiting for trace input
-  *
-  * Schedules a delayed work to wake up any task that is blocked on the
-  * trace_wait queue. These is used with trace_poll for tasks polling the
-  * trace.
-  */
- static void trace_wake_up(struct irq_work *work)
- {
-       wake_up_all(&trace_wait);
- }
+       TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
  
  /**
   * tracing_on - enable tracing buffers
   */
  void tracing_on(void)
  {
-       if (global_trace.buffer)
-               ring_buffer_record_on(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               ring_buffer_record_on(global_trace.trace_buffer.buffer);
        /*
         * This flag is only looked at when buffers haven't been
         * allocated yet. We don't really care about the race
  }
  EXPORT_SYMBOL_GPL(tracing_on);
  
+ /**
+  * __trace_puts - write a constant string into the trace buffer.
+  * @ip:          The address of the caller
+  * @str:   The constant string to write
+  * @size:  The size of the string.
+  */
+ int __trace_puts(unsigned long ip, const char *str, int size)
+ {
+       struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
+       struct print_entry *entry;
+       unsigned long irq_flags;
+       int alloc;
+       alloc = sizeof(*entry) + size + 2; /* possible \n added */
+       local_save_flags(irq_flags);
+       buffer = global_trace.trace_buffer.buffer;
+       event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
+                                         irq_flags, preempt_count());
+       if (!event)
+               return 0;
+       entry = ring_buffer_event_data(event);
+       entry->ip = ip;
+       memcpy(&entry->buf, str, size);
+       /* Add a newline if necessary */
+       if (entry->buf[size - 1] != '\n') {
+               entry->buf[size] = '\n';
+               entry->buf[size + 1] = '\0';
+       } else
+               entry->buf[size] = '\0';
+       __buffer_unlock_commit(buffer, event);
+       return size;
+ }
+ EXPORT_SYMBOL_GPL(__trace_puts);
+ /**
+  * __trace_bputs - write the pointer to a constant string into trace buffer
+  * @ip:          The address of the caller
+  * @str:   The constant string to write to the buffer to
+  */
+ int __trace_bputs(unsigned long ip, const char *str)
+ {
+       struct ring_buffer_event *event;
+       struct ring_buffer *buffer;
+       struct bputs_entry *entry;
+       unsigned long irq_flags;
+       int size = sizeof(struct bputs_entry);
+       local_save_flags(irq_flags);
+       buffer = global_trace.trace_buffer.buffer;
+       event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
+                                         irq_flags, preempt_count());
+       if (!event)
+               return 0;
+       entry = ring_buffer_event_data(event);
+       entry->ip                       = ip;
+       entry->str                      = str;
+       __buffer_unlock_commit(buffer, event);
+       return 1;
+ }
+ EXPORT_SYMBOL_GPL(__trace_bputs);
+ #ifdef CONFIG_TRACER_SNAPSHOT
+ /**
+  * trace_snapshot - take a snapshot of the current buffer.
+  *
+  * This causes a swap between the snapshot buffer and the current live
+  * tracing buffer. You can use this to take snapshots of the live
+  * trace when some condition is triggered, but continue to trace.
+  *
+  * Note, make sure to allocate the snapshot with either
+  * a tracing_snapshot_alloc(), or by doing it manually
+  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
+  *
+  * If the snapshot buffer is not allocated, it will stop tracing.
+  * Basically making a permanent snapshot.
+  */
+ void tracing_snapshot(void)
+ {
+       struct trace_array *tr = &global_trace;
+       struct tracer *tracer = tr->current_trace;
+       unsigned long flags;
+       if (in_nmi()) {
+               internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
+               internal_trace_puts("*** snapshot is being ignored        ***\n");
+               return;
+       }
+       if (!tr->allocated_snapshot) {
+               internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
+               internal_trace_puts("*** stopping trace here!   ***\n");
+               tracing_off();
+               return;
+       }
+       /* Note, snapshot can not be used when the tracer uses it */
+       if (tracer->use_max_tr) {
+               internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
+               internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
+               return;
+       }
+       local_irq_save(flags);
+       update_max_tr(tr, current, smp_processor_id());
+       local_irq_restore(flags);
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot);
+ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
+                                       struct trace_buffer *size_buf, int cpu_id);
+ static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
+ static int alloc_snapshot(struct trace_array *tr)
+ {
+       int ret;
+       if (!tr->allocated_snapshot) {
+               /* allocate spare buffer */
+               ret = resize_buffer_duplicate_size(&tr->max_buffer,
+                                  &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
+               if (ret < 0)
+                       return ret;
+               tr->allocated_snapshot = true;
+       }
+       return 0;
+ }
+ void free_snapshot(struct trace_array *tr)
+ {
+       /*
+        * We don't free the ring buffer. instead, resize it because
+        * The max_tr ring buffer has some state (e.g. ring->clock) and
+        * we want preserve it.
+        */
+       ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
+       set_buffer_entries(&tr->max_buffer, 1);
+       tracing_reset_online_cpus(&tr->max_buffer);
+       tr->allocated_snapshot = false;
+ }
+ /**
+  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
+  *
+  * This is similar to trace_snapshot(), but it will allocate the
+  * snapshot buffer if it isn't already allocated. Use this only
+  * where it is safe to sleep, as the allocation may sleep.
+  *
+  * This causes a swap between the snapshot buffer and the current live
+  * tracing buffer. You can use this to take snapshots of the live
+  * trace when some condition is triggered, but continue to trace.
+  */
+ void tracing_snapshot_alloc(void)
+ {
+       struct trace_array *tr = &global_trace;
+       int ret;
+       ret = alloc_snapshot(tr);
+       if (WARN_ON(ret < 0))
+               return;
+       tracing_snapshot();
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+ #else
+ void tracing_snapshot(void)
+ {
+       WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot);
+ void tracing_snapshot_alloc(void)
+ {
+       /* Give warning */
+       tracing_snapshot();
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+ #endif /* CONFIG_TRACER_SNAPSHOT */
  /**
   * tracing_off - turn off tracing buffers
   *
   */
  void tracing_off(void)
  {
-       if (global_trace.buffer)
-               ring_buffer_record_off(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               ring_buffer_record_off(global_trace.trace_buffer.buffer);
        /*
         * This flag is only looked at when buffers haven't been
         * allocated yet. We don't really care about the race
@@@ -411,8 -567,8 +567,8 @@@ EXPORT_SYMBOL_GPL(tracing_off)
   */
  int tracing_is_on(void)
  {
-       if (global_trace.buffer)
-               return ring_buffer_record_is_on(global_trace.buffer);
+       if (global_trace.trace_buffer.buffer)
+               return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
        return !global_trace.buffer_disabled;
  }
  EXPORT_SYMBOL_GPL(tracing_is_on);
@@@ -479,6 -635,7 +635,7 @@@ static const char *trace_options[] = 
        "disable_on_free",
        "irq-info",
        "markers",
+       "function-trace",
        NULL
  };
  
@@@ -490,6 -647,8 +647,8 @@@ static struct 
        { trace_clock_local,    "local",        1 },
        { trace_clock_global,   "global",       1 },
        { trace_clock_counter,  "counter",      0 },
+       { trace_clock_jiffies,  "uptime",       1 },
+       { trace_clock,          "perf",         1 },
        ARCH_TRACE_CLOCKS
  };
  
@@@ -670,13 -829,14 +829,14 @@@ unsigned long __read_mostly     tracing_max
  static void
  __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
  {
-       struct trace_array_cpu *data = tr->data[cpu];
-       struct trace_array_cpu *max_data;
+       struct trace_buffer *trace_buf = &tr->trace_buffer;
+       struct trace_buffer *max_buf = &tr->max_buffer;
+       struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
+       struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
  
-       max_tr.cpu = cpu;
-       max_tr.time_start = data->preempt_timestamp;
+       max_buf->cpu = cpu;
+       max_buf->time_start = data->preempt_timestamp;
  
-       max_data = max_tr.data[cpu];
        max_data->saved_latency = tracing_max_latency;
        max_data->critical_start = data->critical_start;
        max_data->critical_end = data->critical_end;
@@@ -706,22 -866,22 +866,22 @@@ update_max_tr(struct trace_array *tr, s
  {
        struct ring_buffer *buf;
  
-       if (trace_stop_count)
+       if (tr->stop_count)
                return;
  
        WARN_ON_ONCE(!irqs_disabled());
  
-       if (!current_trace->allocated_snapshot) {
+       if (!tr->allocated_snapshot) {
                /* Only the nop tracer should hit this when disabling */
-               WARN_ON_ONCE(current_trace != &nop_trace);
+               WARN_ON_ONCE(tr->current_trace != &nop_trace);
                return;
        }
  
        arch_spin_lock(&ftrace_max_lock);
  
-       buf = tr->buffer;
-       tr->buffer = max_tr.buffer;
-       max_tr.buffer = buf;
+       buf = tr->trace_buffer.buffer;
+       tr->trace_buffer.buffer = tr->max_buffer.buffer;
+       tr->max_buffer.buffer = buf;
  
        __update_max_tr(tr, tsk, cpu);
        arch_spin_unlock(&ftrace_max_lock);
@@@ -740,19 -900,16 +900,19 @@@ update_max_tr_single(struct trace_arra
  {
        int ret;
  
-       if (trace_stop_count)
+       if (tr->stop_count)
                return;
  
        WARN_ON_ONCE(!irqs_disabled());
-       if (!current_trace->allocated_snapshot) {
 -      if (WARN_ON_ONCE(!tr->allocated_snapshot))
++      if (tr->allocated_snapshot) {
 +              /* Only the nop tracer should hit this when disabling */
-               WARN_ON_ONCE(current_trace != &nop_trace);
++              WARN_ON_ONCE(tr->current_trace != &nop_trace);
                return;
 +      }
  
        arch_spin_lock(&ftrace_max_lock);
  
-       ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+       ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
  
        if (ret == -EBUSY) {
                /*
                 * the max trace buffer (no one writes directly to it)
                 * and flag that it failed.
                 */
-               trace_array_printk(&max_tr, _THIS_IP_,
+               trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
                        "Failed to swap buffers due to commit in progress\n");
        }
  
  
  static void default_wait_pipe(struct trace_iterator *iter)
  {
-       DEFINE_WAIT(wait);
+       /* Iterators are static, they should be filled or empty */
+       if (trace_buffer_iter(iter, iter->cpu_file))
+               return;
+       ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+ }
+ #ifdef CONFIG_FTRACE_STARTUP_TEST
+ static int run_tracer_selftest(struct tracer *type)
+ {
+       struct trace_array *tr = &global_trace;
+       struct tracer *saved_tracer = tr->current_trace;
+       int ret;
  
-       prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
+       if (!type->selftest || tracing_selftest_disabled)
+               return 0;
  
        /*
-        * The events can happen in critical sections where
-        * checking a work queue can cause deadlocks.
-        * After adding a task to the queue, this flag is set
-        * only to notify events to try to wake up the queue
-        * using irq_work.
-        *
-        * We don't clear it even if the buffer is no longer
-        * empty. The flag only causes the next event to run
-        * irq_work to do the work queue wake up. The worse
-        * that can happen if we race with !trace_empty() is that
-        * an event will cause an irq_work to try to wake up
-        * an empty queue.
-        *
-        * There's no reason to protect this flag either, as
-        * the work queue and irq_work logic will do the necessary
-        * synchronization for the wake ups. The only thing
-        * that is necessary is that the wake up happens after
-        * a task has been queued. It's OK for spurious wake ups.
+        * Run a selftest on this tracer.
+        * Here we reset the trace buffer, and set the current
+        * tracer to be this tracer. The tracer can then run some
+        * internal tracing to verify that everything is in order.
+        * If we fail, we do not register this tracer.
         */
-       trace_wakeup_needed = true;
+       tracing_reset_online_cpus(&tr->trace_buffer);
  
-       if (trace_empty(iter))
-               schedule();
+       tr->current_trace = type;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (type->use_max_tr) {
+               /* If we expanded the buffers, make sure the max is expanded too */
+               if (ring_buffer_expanded)
+                       ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
+                                          RING_BUFFER_ALL_CPUS);
+               tr->allocated_snapshot = true;
+       }
+ #endif
+       /* the test is responsible for initializing and enabling */
+       pr_info("Testing tracer %s: ", type->name);
+       ret = type->selftest(type, tr);
+       /* the test is responsible for resetting too */
+       tr->current_trace = saved_tracer;
+       if (ret) {
+               printk(KERN_CONT "FAILED!\n");
+               /* Add the warning after printing 'FAILED' */
+               WARN_ON(1);
+               return -1;
+       }
+       /* Only reset on passing, to avoid touching corrupted buffers */
+       tracing_reset_online_cpus(&tr->trace_buffer);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (type->use_max_tr) {
+               tr->allocated_snapshot = false;
  
-       finish_wait(&trace_wait, &wait);
+               /* Shrink the max buffer again */
+               if (ring_buffer_expanded)
+                       ring_buffer_resize(tr->max_buffer.buffer, 1,
+                                          RING_BUFFER_ALL_CPUS);
+       }
+ #endif
+       printk(KERN_CONT "PASSED\n");
+       return 0;
+ }
+ #else
+ static inline int run_tracer_selftest(struct tracer *type)
+ {
+       return 0;
  }
+ #endif /* CONFIG_FTRACE_STARTUP_TEST */
  
  /**
   * register_tracer - register a tracer with the ftrace system.
@@@ -851,57 -1049,9 +1052,9 @@@ int register_tracer(struct tracer *type
        if (!type->wait_pipe)
                type->wait_pipe = default_wait_pipe;
  
- #ifdef CONFIG_FTRACE_STARTUP_TEST
-       if (type->selftest && !tracing_selftest_disabled) {
-               struct tracer *saved_tracer = current_trace;
-               struct trace_array *tr = &global_trace;
-               /*
-                * Run a selftest on this tracer.
-                * Here we reset the trace buffer, and set the current
-                * tracer to be this tracer. The tracer can then run some
-                * internal tracing to verify that everything is in order.
-                * If we fail, we do not register this tracer.
-                */
-               tracing_reset_online_cpus(tr);
-               current_trace = type;
-               if (type->use_max_tr) {
-                       /* If we expanded the buffers, make sure the max is expanded too */
-                       if (ring_buffer_expanded)
-                               ring_buffer_resize(max_tr.buffer, trace_buf_size,
-                                                  RING_BUFFER_ALL_CPUS);
-                       type->allocated_snapshot = true;
-               }
-               /* the test is responsible for initializing and enabling */
-               pr_info("Testing tracer %s: ", type->name);
-               ret = type->selftest(type, tr);
-               /* the test is responsible for resetting too */
-               current_trace = saved_tracer;
-               if (ret) {
-                       printk(KERN_CONT "FAILED!\n");
-                       /* Add the warning after printing 'FAILED' */
-                       WARN_ON(1);
-                       goto out;
-               }
-               /* Only reset on passing, to avoid touching corrupted buffers */
-               tracing_reset_online_cpus(tr);
-               if (type->use_max_tr) {
-                       type->allocated_snapshot = false;
-                       /* Shrink the max buffer again */
-                       if (ring_buffer_expanded)
-                               ring_buffer_resize(max_tr.buffer, 1,
-                                                  RING_BUFFER_ALL_CPUS);
-               }
-               printk(KERN_CONT "PASSED\n");
-       }
- #endif
+       ret = run_tracer_selftest(type);
+       if (ret < 0)
+               goto out;
  
        type->next = trace_types;
        trace_types = type;
        tracing_set_tracer(type->name);
        default_bootup_tracer = NULL;
        /* disable other selftests, since this will break it. */
-       tracing_selftest_disabled = 1;
+       tracing_selftest_disabled = true;
  #ifdef CONFIG_FTRACE_STARTUP_TEST
        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
               type->name);
        return ret;
  }
  
- void tracing_reset(struct trace_array *tr, int cpu)
+ void tracing_reset(struct trace_buffer *buf, int cpu)
  {
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = buf->buffer;
  
        if (!buffer)
                return;
        ring_buffer_record_enable(buffer);
  }
  
- void tracing_reset_online_cpus(struct trace_array *tr)
+ void tracing_reset_online_cpus(struct trace_buffer *buf)
  {
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = buf->buffer;
        int cpu;
  
        if (!buffer)
        /* Make sure all commits have finished */
        synchronize_sched();
  
-       tr->time_start = ftrace_now(tr->cpu);
+       buf->time_start = ftrace_now(buf->cpu);
  
        for_each_online_cpu(cpu)
                ring_buffer_reset_cpu(buffer, cpu);
  
  void tracing_reset_current(int cpu)
  {
-       tracing_reset(&global_trace, cpu);
+       tracing_reset(&global_trace.trace_buffer, cpu);
  }
  
- void tracing_reset_current_online_cpus(void)
+ void tracing_reset_all_online_cpus(void)
  {
-       tracing_reset_online_cpus(&global_trace);
+       struct trace_array *tr;
+       mutex_lock(&trace_types_lock);
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               tracing_reset_online_cpus(&tr->trace_buffer);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+               tracing_reset_online_cpus(&tr->max_buffer);
+ #endif
+       }
+       mutex_unlock(&trace_types_lock);
  }
  
  #define SAVED_CMDLINES 128
@@@ -998,7 -1157,7 +1160,7 @@@ static void trace_init_cmdlines(void
  
  int is_tracing_stopped(void)
  {
-       return trace_stop_count;
+       return global_trace.stop_count;
  }
  
  /**
@@@ -1030,12 -1189,12 +1192,12 @@@ void tracing_start(void
        if (tracing_disabled)
                return;
  
-       raw_spin_lock_irqsave(&tracing_start_lock, flags);
-       if (--trace_stop_count) {
-               if (trace_stop_count < 0) {
+       raw_spin_lock_irqsave(&global_trace.start_lock, flags);
+       if (--global_trace.stop_count) {
+               if (global_trace.stop_count < 0) {
                        /* Someone screwed up their debugging */
                        WARN_ON_ONCE(1);
-                       trace_stop_count = 0;
+                       global_trace.stop_count = 0;
                }
                goto out;
        }
        /* Prevent the buffers from switching */
        arch_spin_lock(&ftrace_max_lock);
  
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_enable(buffer);
  
-       buffer = max_tr.buffer;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       buffer = global_trace.max_buffer.buffer;
        if (buffer)
                ring_buffer_record_enable(buffer);
+ #endif
  
        arch_spin_unlock(&ftrace_max_lock);
  
        ftrace_start();
   out:
-       raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
+       raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
+ }
+ static void tracing_start_tr(struct trace_array *tr)
+ {
+       struct ring_buffer *buffer;
+       unsigned long flags;
+       if (tracing_disabled)
+               return;
+       /* If global, we need to also start the max tracer */
+       if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+               return tracing_start();
+       raw_spin_lock_irqsave(&tr->start_lock, flags);
+       if (--tr->stop_count) {
+               if (tr->stop_count < 0) {
+                       /* Someone screwed up their debugging */
+                       WARN_ON_ONCE(1);
+                       tr->stop_count = 0;
+               }
+               goto out;
+       }
+       buffer = tr->trace_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_enable(buffer);
+  out:
+       raw_spin_unlock_irqrestore(&tr->start_lock, flags);
  }
  
  /**
@@@ -1070,25 -1262,48 +1265,48 @@@ void tracing_stop(void
        unsigned long flags;
  
        ftrace_stop();
-       raw_spin_lock_irqsave(&tracing_start_lock, flags);
-       if (trace_stop_count++)
+       raw_spin_lock_irqsave(&global_trace.start_lock, flags);
+       if (global_trace.stop_count++)
                goto out;
  
        /* Prevent the buffers from switching */
        arch_spin_lock(&ftrace_max_lock);
  
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
  
-       buffer = max_tr.buffer;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       buffer = global_trace.max_buffer.buffer;
        if (buffer)
                ring_buffer_record_disable(buffer);
+ #endif
  
        arch_spin_unlock(&ftrace_max_lock);
  
   out:
-       raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
+       raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
+ }
+ static void tracing_stop_tr(struct trace_array *tr)
+ {
+       struct ring_buffer *buffer;
+       unsigned long flags;
+       /* If global, we need to also stop the max tracer */
+       if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+               return tracing_stop();
+       raw_spin_lock_irqsave(&tr->start_lock, flags);
+       if (tr->stop_count++)
+               goto out;
+       buffer = tr->trace_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_disable(buffer);
+  out:
+       raw_spin_unlock_irqrestore(&tr->start_lock, flags);
  }
  
  void trace_stop_cmdline_recording(void);
@@@ -1221,11 -1436,6 +1439,6 @@@ voi
  __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
  {
        __this_cpu_write(trace_cmdline_save, true);
-       if (trace_wakeup_needed) {
-               trace_wakeup_needed = false;
-               /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&trace_work_wakeup);
-       }
        ring_buffer_unlock_commit(buffer, event);
  }
  
@@@ -1248,12 -1458,24 +1461,24 @@@ void trace_buffer_unlock_commit(struct 
  }
  EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
  
+ struct ring_buffer_event *
+ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
+                         struct ftrace_event_file *ftrace_file,
+                         int type, unsigned long len,
+                         unsigned long flags, int pc)
+ {
+       *current_rb = ftrace_file->tr->trace_buffer.buffer;
+       return trace_buffer_lock_reserve(*current_rb,
+                                        type, len, flags, pc);
+ }
+ EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
  struct ring_buffer_event *
  trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
                                  int type, unsigned long len,
                                  unsigned long flags, int pc)
  {
-       *current_rb = global_trace.buffer;
+       *current_rb = global_trace.trace_buffer.buffer;
        return trace_buffer_lock_reserve(*current_rb,
                                         type, len, flags, pc);
  }
@@@ -1292,7 -1514,7 +1517,7 @@@ trace_function(struct trace_array *tr
               int pc)
  {
        struct ftrace_event_call *call = &event_function;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
  
@@@ -1433,13 -1655,14 +1658,14 @@@ void ftrace_trace_stack(struct ring_buf
  void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
                   int pc)
  {
-       __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
+       __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
  }
  
  /**
   * trace_dump_stack - record a stack back trace in the trace buffer
+  * @skip: Number of functions to skip (helper handlers)
   */
- void trace_dump_stack(void)
+ void trace_dump_stack(int skip)
  {
        unsigned long flags;
  
  
        local_save_flags(flags);
  
-       /* skipping 3 traces, seems to get us at the caller of this function */
-       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
+       /*
+        * Skip 3 more, seems to get us at the caller of
+        * this function.
+        */
+       skip += 3;
+       __ftrace_trace_stack(global_trace.trace_buffer.buffer,
+                            flags, skip, preempt_count(), NULL);
  }
  
  static DEFINE_PER_CPU(int, user_stack_count);
@@@ -1619,7 -1847,7 +1850,7 @@@ void trace_printk_init_buffers(void
         * directly here. If the global_trace.buffer is already
         * allocated here, then this was called by module code.
         */
-       if (global_trace.buffer)
+       if (global_trace.trace_buffer.buffer)
                tracing_start_cmdline_record();
  }
  
@@@ -1679,7 -1907,7 +1910,7 @@@ int trace_vbprintk(unsigned long ip, co
  
        local_save_flags(flags);
        size = sizeof(*entry) + sizeof(u32) * len;
-       buffer = tr->buffer;
+       buffer = tr->trace_buffer.buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
                                          flags, pc);
        if (!event)
  }
  EXPORT_SYMBOL_GPL(trace_vbprintk);
  
- int trace_array_printk(struct trace_array *tr,
-                      unsigned long ip, const char *fmt, ...)
- {
-       int ret;
-       va_list ap;
-       if (!(trace_flags & TRACE_ITER_PRINTK))
-               return 0;
-       va_start(ap, fmt);
-       ret = trace_array_vprintk(tr, ip, fmt, ap);
-       va_end(ap);
-       return ret;
- }
- int trace_array_vprintk(struct trace_array *tr,
-                       unsigned long ip, const char *fmt, va_list args)
+ static int
+ __trace_array_vprintk(struct ring_buffer *buffer,
+                     unsigned long ip, const char *fmt, va_list args)
  {
        struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
-       struct ring_buffer *buffer;
        int len = 0, size, pc;
        struct print_entry *entry;
        unsigned long flags;
  
        local_save_flags(flags);
        size = sizeof(*entry) + len + 1;
-       buffer = tr->buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                                          flags, pc);
        if (!event)
        return len;
  }
  
- int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
- {
+ int trace_array_vprintk(struct trace_array *tr,
+                       unsigned long ip, const char *fmt, va_list args)
+ {
+       return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
+ }
+ int trace_array_printk(struct trace_array *tr,
+                      unsigned long ip, const char *fmt, ...)
+ {
+       int ret;
+       va_list ap;
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
+       va_start(ap, fmt);
+       ret = trace_array_vprintk(tr, ip, fmt, ap);
+       va_end(ap);
+       return ret;
+ }
+ int trace_array_printk_buf(struct ring_buffer *buffer,
+                          unsigned long ip, const char *fmt, ...)
+ {
+       int ret;
+       va_list ap;
+       if (!(trace_flags & TRACE_ITER_PRINTK))
+               return 0;
+       va_start(ap, fmt);
+       ret = __trace_array_vprintk(buffer, ip, fmt, ap);
+       va_end(ap);
+       return ret;
+ }
+ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+ {
        return trace_array_vprintk(&global_trace, ip, fmt, args);
  }
  EXPORT_SYMBOL_GPL(trace_vprintk);
@@@ -1796,7 -2044,7 +2047,7 @@@ peek_next_entry(struct trace_iterator *
        if (buf_iter)
                event = ring_buffer_iter_peek(buf_iter, ts);
        else
-               event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
+               event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
                                         lost_events);
  
        if (event) {
@@@ -1811,7 -2059,7 +2062,7 @@@ static struct trace_entry 
  __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
                  unsigned long *missing_events, u64 *ent_ts)
  {
-       struct ring_buffer *buffer = iter->tr->buffer;
+       struct ring_buffer *buffer = iter->trace_buffer->buffer;
        struct trace_entry *ent, *next = NULL;
        unsigned long lost_events = 0, next_lost = 0;
        int cpu_file = iter->cpu_file;
         * If we are in a per_cpu trace file, don't bother by iterating over
         * all cpu and peek directly.
         */
-       if (cpu_file > TRACE_PIPE_ALL_CPU) {
+       if (cpu_file > RING_BUFFER_ALL_CPUS) {
                if (ring_buffer_empty_cpu(buffer, cpu_file))
                        return NULL;
                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
@@@ -1888,7 -2136,7 +2139,7 @@@ void *trace_find_next_entry_inc(struct 
  
  static void trace_consume(struct trace_iterator *iter)
  {
-       ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
+       ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
                            &iter->lost_events);
  }
  
@@@ -1921,13 -2169,12 +2172,12 @@@ static void *s_next(struct seq_file *m
  
  void tracing_iter_reset(struct trace_iterator *iter, int cpu)
  {
-       struct trace_array *tr = iter->tr;
        struct ring_buffer_event *event;
        struct ring_buffer_iter *buf_iter;
        unsigned long entries = 0;
        u64 ts;
  
-       tr->data[cpu]->skipped_entries = 0;
+       per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
  
        buf_iter = trace_buffer_iter(iter, cpu);
        if (!buf_iter)
         * by the timestamp being before the start of the buffer.
         */
        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
-               if (ts >= iter->tr->time_start)
+               if (ts >= iter->trace_buffer->time_start)
                        break;
                entries++;
                ring_buffer_read(buf_iter, NULL);
        }
  
-       tr->data[cpu]->skipped_entries = entries;
+       per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
  }
  
  /*
  static void *s_start(struct seq_file *m, loff_t *pos)
  {
        struct trace_iterator *iter = m->private;
+       struct trace_array *tr = iter->tr;
        int cpu_file = iter->cpu_file;
        void *p = NULL;
        loff_t l = 0;
         * will point to the same string as current_trace->name.
         */
        mutex_lock(&trace_types_lock);
-       if (unlikely(current_trace && iter->trace->name != current_trace->name))
-               *iter->trace = *current_trace;
+       if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+               *iter->trace = *tr->current_trace;
        mutex_unlock(&trace_types_lock);
  
+ #ifdef CONFIG_TRACER_MAX_TRACE
        if (iter->snapshot && iter->trace->use_max_tr)
                return ERR_PTR(-EBUSY);
+ #endif
  
        if (!iter->snapshot)
                atomic_inc(&trace_record_cmdline_disabled);
                iter->cpu = 0;
                iter->idx = -1;
  
-               if (cpu_file == TRACE_PIPE_ALL_CPU) {
+               if (cpu_file == RING_BUFFER_ALL_CPUS) {
                        for_each_tracing_cpu(cpu)
                                tracing_iter_reset(iter, cpu);
                } else
@@@ -2016,17 -2266,21 +2269,21 @@@ static void s_stop(struct seq_file *m, 
  {
        struct trace_iterator *iter = m->private;
  
+ #ifdef CONFIG_TRACER_MAX_TRACE
        if (iter->snapshot && iter->trace->use_max_tr)
                return;
+ #endif
  
        if (!iter->snapshot)
                atomic_dec(&trace_record_cmdline_disabled);
        trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
  }
  
  static void
- get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
+ get_total_entries(struct trace_buffer *buf,
+                 unsigned long *total, unsigned long *entries)
  {
        unsigned long count;
        int cpu;
        *entries = 0;
  
        for_each_tracing_cpu(cpu) {
-               count = ring_buffer_entries_cpu(tr->buffer, cpu);
+               count = ring_buffer_entries_cpu(buf->buffer, cpu);
                /*
                 * If this buffer has skipped entries, then we hold all
                 * entries for the trace and we need to ignore the
                 * ones before the time stamp.
                 */
-               if (tr->data[cpu]->skipped_entries) {
-                       count -= tr->data[cpu]->skipped_entries;
+               if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
+                       count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
                        /* total is the same as the entries */
                        *total += count;
                } else
                        *total += count +
-                               ring_buffer_overrun_cpu(tr->buffer, cpu);
+                               ring_buffer_overrun_cpu(buf->buffer, cpu);
                *entries += count;
        }
  }
@@@ -2064,27 -2318,27 +2321,27 @@@ static void print_lat_help_header(struc
        seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
  }
  
- static void print_event_info(struct trace_array *tr, struct seq_file *m)
+ static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  {
        unsigned long total;
        unsigned long entries;
  
-       get_total_entries(tr, &total, &entries);
+       get_total_entries(buf, &total, &entries);
        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
                   entries, total, num_online_cpus());
        seq_puts(m, "#\n");
  }
  
- static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
+ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
  {
-       print_event_info(tr, m);
+       print_event_info(buf, m);
        seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
        seq_puts(m, "#              | |       |          |         |\n");
  }
  
- static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
+ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
  {
-       print_event_info(tr, m);
+       print_event_info(buf, m);
        seq_puts(m, "#                              _-----=> irqs-off\n");
        seq_puts(m, "#                             / _----=> need-resched\n");
        seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
  print_trace_header(struct seq_file *m, struct trace_iterator *iter)
  {
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-       struct trace_array *tr = iter->tr;
-       struct trace_array_cpu *data = tr->data[tr->cpu];
-       struct tracer *type = current_trace;
+       struct trace_buffer *buf = iter->trace_buffer;
+       struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
+       struct tracer *type = iter->trace;
        unsigned long entries;
        unsigned long total;
        const char *name = "preemption";
  
        name = type->name;
  
-       get_total_entries(tr, &total, &entries);
+       get_total_entries(buf, &total, &entries);
  
        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
                   name, UTS_RELEASE);
                   nsecs_to_usecs(data->saved_latency),
                   entries,
                   total,
-                  tr->cpu,
+                  buf->cpu,
  #if defined(CONFIG_PREEMPT_NONE)
                   "server",
  #elif defined(CONFIG_PREEMPT_VOLUNTARY)
@@@ -2169,7 -2423,7 +2426,7 @@@ static void test_cpu_buff_start(struct 
        if (cpumask_test_cpu(iter->cpu, iter->started))
                return;
  
-       if (iter->tr->data[iter->cpu]->skipped_entries)
+       if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
                return;
  
        cpumask_set_cpu(iter->cpu, iter->started);
@@@ -2292,14 -2546,14 +2549,14 @@@ int trace_empty(struct trace_iterator *
        int cpu;
  
        /* If we are looking at one CPU buffer, only check that one */
-       if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
+       if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
                cpu = iter->cpu_file;
                buf_iter = trace_buffer_iter(iter, cpu);
                if (buf_iter) {
                        if (!ring_buffer_iter_empty(buf_iter))
                                return 0;
                } else {
-                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                       if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
                                return 0;
                }
                return 1;
                        if (!ring_buffer_iter_empty(buf_iter))
                                return 0;
                } else {
-                       if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
+                       if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
                                return 0;
                }
        }
@@@ -2335,6 -2589,11 +2592,11 @@@ enum print_line_t print_trace_line(stru
                        return ret;
        }
  
+       if (iter->ent->type == TRACE_BPUTS &&
+                       trace_flags & TRACE_ITER_PRINTK &&
+                       trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+               return trace_print_bputs_msg_only(iter);
        if (iter->ent->type == TRACE_BPRINT &&
                        trace_flags & TRACE_ITER_PRINTK &&
                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
@@@ -2389,9 -2648,9 +2651,9 @@@ void trace_default_header(struct seq_fi
        } else {
                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
                        if (trace_flags & TRACE_ITER_IRQ_INFO)
-                               print_func_help_header_irq(iter->tr, m);
+                               print_func_help_header_irq(iter->trace_buffer, m);
                        else
-                               print_func_help_header(iter->tr, m);
+                               print_func_help_header(iter->trace_buffer, m);
                }
        }
  }
@@@ -2405,14 -2664,8 +2667,8 @@@ static void test_ftrace_alive(struct se
  }
  
  #ifdef CONFIG_TRACER_MAX_TRACE
- static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+ static void show_snapshot_main_help(struct seq_file *m)
  {
-       if (iter->trace->allocated_snapshot)
-               seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
-       else
-               seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
-       seq_printf(m, "# Snapshot commands:\n");
        seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
        seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
        seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
        seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
        seq_printf(m, "#                       is not a '0' or '1')\n");
  }
+ static void show_snapshot_percpu_help(struct seq_file *m)
+ {
+       seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
+ #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+       seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
+       seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
+ #else
+       seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
+       seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
+ #endif
+       seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
+       seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
+       seq_printf(m, "#                       is not a '0' or '1')\n");
+ }
+ static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
+ {
+       if (iter->tr->allocated_snapshot)
+               seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
+       else
+               seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
+       seq_printf(m, "# Snapshot commands:\n");
+       if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+               show_snapshot_main_help(m);
+       else
+               show_snapshot_percpu_help(m);
+ }
  #else
  /* Should never be called */
  static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
@@@ -2479,7 -2761,8 +2764,8 @@@ static const struct seq_operations trac
  static struct trace_iterator *
  __tracing_open(struct inode *inode, struct file *file, bool snapshot)
  {
-       long cpu_file = (long) inode->i_private;
+       struct trace_cpu *tc = inode->i_private;
+       struct trace_array *tr = tc->tr;
        struct trace_iterator *iter;
        int cpu;
  
        if (!iter->trace)
                goto fail;
  
-       *iter->trace = *current_trace;
+       *iter->trace = *tr->current_trace;
  
        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
                goto fail;
  
-       if (current_trace->print_max || snapshot)
-               iter->tr = &max_tr;
+       iter->tr = tr;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       /* Currently only the top directory has a snapshot */
+       if (tr->current_trace->print_max || snapshot)
+               iter->trace_buffer = &tr->max_buffer;
        else
-               iter->tr = &global_trace;
+ #endif
+               iter->trace_buffer = &tr->trace_buffer;
        iter->snapshot = snapshot;
        iter->pos = -1;
        mutex_init(&iter->mutex);
-       iter->cpu_file = cpu_file;
+       iter->cpu_file = tc->cpu;
  
        /* Notify the tracer early; before we stop tracing. */
        if (iter->trace && iter->trace->open)
                iter->trace->open(iter);
  
        /* Annotate start of buffers if we had overruns */
-       if (ring_buffer_overruns(iter->tr->buffer))
+       if (ring_buffer_overruns(iter->trace_buffer->buffer))
                iter->iter_flags |= TRACE_FILE_ANNOTATE;
  
        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
  
        /* stop the trace while dumping if we are not opening "snapshot" */
        if (!iter->snapshot)
-               tracing_stop();
+               tracing_stop_tr(tr);
  
-       if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
+       if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
                        iter->buffer_iter[cpu] =
-                               ring_buffer_read_prepare(iter->tr->buffer, cpu);
+                               ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
                }
                ring_buffer_read_prepare_sync();
                for_each_tracing_cpu(cpu) {
        } else {
                cpu = iter->cpu_file;
                iter->buffer_iter[cpu] =
-                       ring_buffer_read_prepare(iter->tr->buffer, cpu);
+                       ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
                ring_buffer_read_prepare_sync();
                ring_buffer_read_start(iter->buffer_iter[cpu]);
                tracing_iter_reset(iter, cpu);
        }
  
+       tr->ref++;
        mutex_unlock(&trace_types_lock);
  
        return iter;
@@@ -2579,14 -2869,20 +2872,20 @@@ static int tracing_release(struct inod
  {
        struct seq_file *m = file->private_data;
        struct trace_iterator *iter;
+       struct trace_array *tr;
        int cpu;
  
        if (!(file->f_mode & FMODE_READ))
                return 0;
  
        iter = m->private;
+       tr = iter->tr;
  
        mutex_lock(&trace_types_lock);
+       WARN_ON(!tr->ref);
+       tr->ref--;
        for_each_tracing_cpu(cpu) {
                if (iter->buffer_iter[cpu])
                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
  
        if (!iter->snapshot)
                /* reenable tracing if it was previously enabled */
-               tracing_start();
+               tracing_start_tr(tr);
        mutex_unlock(&trace_types_lock);
  
        mutex_destroy(&iter->mutex);
@@@ -2616,12 -2912,13 +2915,13 @@@ static int tracing_open(struct inode *i
        /* If this file was open for write, then erase contents */
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC)) {
-               long cpu = (long) inode->i_private;
+               struct trace_cpu *tc = inode->i_private;
+               struct trace_array *tr = tc->tr;
  
-               if (cpu == TRACE_PIPE_ALL_CPU)
-                       tracing_reset_online_cpus(&global_trace);
+               if (tc->cpu == RING_BUFFER_ALL_CPUS)
+                       tracing_reset_online_cpus(&tr->trace_buffer);
                else
-                       tracing_reset(&global_trace, cpu);
+                       tracing_reset(&tr->trace_buffer, tc->cpu);
        }
  
        if (file->f_mode & FMODE_READ) {
@@@ -2768,8 -3065,9 +3068,9 @@@ static ssize_
  tracing_cpumask_write(struct file *filp, const char __user *ubuf,
                      size_t count, loff_t *ppos)
  {
-       int err, cpu;
+       struct trace_array *tr = filp->private_data;
        cpumask_var_t tracing_cpumask_new;
+       int err, cpu;
  
        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
                return -ENOMEM;
                 */
                if (cpumask_test_cpu(cpu, tracing_cpumask) &&
                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
-                       atomic_inc(&global_trace.data[cpu]->disabled);
-                       ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
+                       atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
+                       ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
                }
                if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
-                       atomic_dec(&global_trace.data[cpu]->disabled);
-                       ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
+                       atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
+                       ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
                }
        }
        arch_spin_unlock(&ftrace_max_lock);
@@@ -2824,12 -3122,13 +3125,13 @@@ static const struct file_operations tra
  static int tracing_trace_options_show(struct seq_file *m, void *v)
  {
        struct tracer_opt *trace_opts;
+       struct trace_array *tr = m->private;
        u32 tracer_flags;
        int i;
  
        mutex_lock(&trace_types_lock);
-       tracer_flags = current_trace->flags->val;
-       trace_opts = current_trace->flags->opts;
+       tracer_flags = tr->current_trace->flags->val;
+       trace_opts = tr->current_trace->flags->opts;
  
        for (i = 0; trace_options[i]; i++) {
                if (trace_flags & (1 << i))
@@@ -2893,15 -3192,15 +3195,15 @@@ int trace_keep_overwrite(struct tracer 
        return 0;
  }
  
- int set_tracer_flag(unsigned int mask, int enabled)
+ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
  {
        /* do nothing if flag is already set */
        if (!!(trace_flags & mask) == !!enabled)
                return 0;
  
        /* Give the tracer a chance to approve the change */
-       if (current_trace->flag_changed)
-               if (current_trace->flag_changed(current_trace, mask, !!enabled))
+       if (tr->current_trace->flag_changed)
+               if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
                        return -EINVAL;
  
        if (enabled)
                trace_event_enable_cmd_record(enabled);
  
        if (mask == TRACE_ITER_OVERWRITE) {
-               ring_buffer_change_overwrite(global_trace.buffer, enabled);
+               ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
  #ifdef CONFIG_TRACER_MAX_TRACE
-               ring_buffer_change_overwrite(max_tr.buffer, enabled);
+               ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
  #endif
        }
  
        return 0;
  }
  
- static int trace_set_options(char *option)
+ static int trace_set_options(struct trace_array *tr, char *option)
  {
        char *cmp;
        int neg = 0;
  
        for (i = 0; trace_options[i]; i++) {
                if (strcmp(cmp, trace_options[i]) == 0) {
-                       ret = set_tracer_flag(1 << i, !neg);
+                       ret = set_tracer_flag(tr, 1 << i, !neg);
                        break;
                }
        }
  
        /* If no option could be set, test the specific tracer options */
        if (!trace_options[i])
-               ret = set_tracer_option(current_trace, cmp, neg);
+               ret = set_tracer_option(tr->current_trace, cmp, neg);
  
        mutex_unlock(&trace_types_lock);
  
@@@ -2961,6 -3260,8 +3263,8 @@@ static ssize_
  tracing_trace_options_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
  {
+       struct seq_file *m = filp->private_data;
+       struct trace_array *tr = m->private;
        char buf[64];
        int ret;
  
  
        buf[cnt] = 0;
  
-       ret = trace_set_options(buf);
+       ret = trace_set_options(tr, buf);
        if (ret < 0)
                return ret;
  
@@@ -2985,7 -3286,8 +3289,8 @@@ static int tracing_trace_options_open(s
  {
        if (tracing_disabled)
                return -ENODEV;
-       return single_open(file, tracing_trace_options_show, NULL);
+       return single_open(file, tracing_trace_options_show, inode->i_private);
  }
  
  static const struct file_operations tracing_iter_fops = {
  
  static const char readme_msg[] =
        "tracing mini-HOWTO:\n\n"
-       "# mount -t debugfs nodev /sys/kernel/debug\n\n"
-       "# cat /sys/kernel/debug/tracing/available_tracers\n"
-       "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
-       "# cat /sys/kernel/debug/tracing/current_tracer\n"
-       "nop\n"
-       "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
-       "# cat /sys/kernel/debug/tracing/current_tracer\n"
-       "wakeup\n"
-       "# cat /sys/kernel/debug/tracing/trace_options\n"
-       "noprint-parent nosym-offset nosym-addr noverbose\n"
-       "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
-       "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
-       "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
-       "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
+       "# echo 0 > tracing_on : quick way to disable tracing\n"
+       "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
+       " Important files:\n"
+       "  trace\t\t\t- The static contents of the buffer\n"
+       "\t\t\t  To clear the buffer write into this file: echo > trace\n"
+       "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
+       "  current_tracer\t- function and latency tracers\n"
+       "  available_tracers\t- list of configured tracers for current_tracer\n"
+       "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
+       "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
+       "  trace_clock\t\t-change the clock used to order events\n"
+       "       local:   Per cpu clock but may not be synced across CPUs\n"
+       "      global:   Synced across CPUs but slows tracing down.\n"
+       "     counter:   Not a clock, but just an increment\n"
+       "      uptime:   Jiffy counter from time of boot\n"
+       "        perf:   Same clock that perf events use\n"
+ #ifdef CONFIG_X86_64
+       "     x86-tsc:   TSC cycle counter\n"
+ #endif
+       "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
+       "  tracing_cpumask\t- Limit which CPUs to trace\n"
+       "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
+       "\t\t\t  Remove sub-buffer with rmdir\n"
+       "  trace_options\t\t- Set format or modify how tracing happens\n"
+       "\t\t\t  Disable an option by adding a suffix 'no' to the option name\n"
+ #ifdef CONFIG_DYNAMIC_FTRACE
+       "\n  available_filter_functions - list of functions that can be filtered on\n"
+       "  set_ftrace_filter\t- echo function name in here to only trace these functions\n"
+       "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
+       "            modules: Can select a group via module\n"
+       "             Format: :mod:<module-name>\n"
+       "             example: echo :mod:ext3 > set_ftrace_filter\n"
+       "            triggers: a command to perform when function is hit\n"
+       "              Format: <function>:<trigger>[:count]\n"
+       "             trigger: traceon, traceoff\n"
+       "                      enable_event:<system>:<event>\n"
+       "                      disable_event:<system>:<event>\n"
+ #ifdef CONFIG_STACKTRACE
+       "                      stacktrace\n"
+ #endif
+ #ifdef CONFIG_TRACER_SNAPSHOT
+       "                      snapshot\n"
+ #endif
+       "             example: echo do_fault:traceoff > set_ftrace_filter\n"
+       "                      echo do_trap:traceoff:3 > set_ftrace_filter\n"
+       "             The first one will disable tracing every time do_fault is hit\n"
+       "             The second will disable tracing at most 3 times when do_trap is hit\n"
+       "               The first time do trap is hit and it disables tracing, the counter\n"
+       "               will decrement to 2. If tracing is already disabled, the counter\n"
+       "               will not decrement. It only decrements when the trigger did work\n"
+       "             To remove trigger without count:\n"
+       "               echo '!<function>:<trigger> > set_ftrace_filter\n"
+       "             To remove trigger with a count:\n"
+       "               echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
+       "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
+       "            accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
+       "            modules: Can select a group via module command :mod:\n"
+       "            Does not accept triggers\n"
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #ifdef CONFIG_FUNCTION_TRACER
+       "  set_ftrace_pid\t- Write pid(s) to only function trace those pids (function)\n"
+ #endif
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
+       "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
+ #endif
+ #ifdef CONFIG_TRACER_SNAPSHOT
+       "\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
+       "\t\t\t  Read the contents for more information\n"
+ #endif
+ #ifdef CONFIG_STACKTRACE
+       "  stack_trace\t\t- Shows the max stack trace when active\n"
+       "  stack_max_size\t- Shows current max stack size that was traced\n"
+       "\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
+ #ifdef CONFIG_DYNAMIC_FTRACE
+       "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
+ #endif
+ #endif /* CONFIG_STACKTRACE */
  ;
  
  static ssize_t
@@@ -3083,11 -3449,12 +3452,12 @@@ static ssize_
  tracing_set_trace_read(struct file *filp, char __user *ubuf,
                       size_t cnt, loff_t *ppos)
  {
+       struct trace_array *tr = filp->private_data;
        char buf[MAX_TRACER_SIZE+2];
        int r;
  
        mutex_lock(&trace_types_lock);
-       r = sprintf(buf, "%s\n", current_trace->name);
+       r = sprintf(buf, "%s\n", tr->current_trace->name);
        mutex_unlock(&trace_types_lock);
  
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  
  int tracer_init(struct tracer *t, struct trace_array *tr)
  {
-       tracing_reset_online_cpus(tr);
+       tracing_reset_online_cpus(&tr->trace_buffer);
        return t->init(tr);
  }
  
- static void set_buffer_entries(struct trace_array *tr, unsigned long val)
+ static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
  {
        int cpu;
        for_each_tracing_cpu(cpu)
-               tr->data[cpu]->entries = val;
+               per_cpu_ptr(buf->data, cpu)->entries = val;
  }
  
+ #ifdef CONFIG_TRACER_MAX_TRACE
  /* resize @tr's buffer to the size of @size_tr's entries */
- static int resize_buffer_duplicate_size(struct trace_array *tr,
-                                       struct trace_array *size_tr, int cpu_id)
+ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
+                                       struct trace_buffer *size_buf, int cpu_id)
  {
        int cpu, ret = 0;
  
        if (cpu_id == RING_BUFFER_ALL_CPUS) {
                for_each_tracing_cpu(cpu) {
-                       ret = ring_buffer_resize(tr->buffer,
-                                       size_tr->data[cpu]->entries, cpu);
+                       ret = ring_buffer_resize(trace_buf->buffer,
+                                per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
                        if (ret < 0)
                                break;
-                       tr->data[cpu]->entries = size_tr->data[cpu]->entries;
+                       per_cpu_ptr(trace_buf->data, cpu)->entries =
+                               per_cpu_ptr(size_buf->data, cpu)->entries;
                }
        } else {
-               ret = ring_buffer_resize(tr->buffer,
-                                       size_tr->data[cpu_id]->entries, cpu_id);
+               ret = ring_buffer_resize(trace_buf->buffer,
+                                per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
                if (ret == 0)
-                       tr->data[cpu_id]->entries =
-                               size_tr->data[cpu_id]->entries;
+                       per_cpu_ptr(trace_buf->data, cpu_id)->entries =
+                               per_cpu_ptr(size_buf->data, cpu_id)->entries;
        }
  
        return ret;
  }
+ #endif /* CONFIG_TRACER_MAX_TRACE */
  
- static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
+ static int __tracing_resize_ring_buffer(struct trace_array *tr,
+                                       unsigned long size, int cpu)
  {
        int ret;
  
         * we use the size that was given, and we can forget about
         * expanding it later.
         */
-       ring_buffer_expanded = 1;
+       ring_buffer_expanded = true;
  
        /* May be called before buffers are initialized */
-       if (!global_trace.buffer)
+       if (!tr->trace_buffer.buffer)
                return 0;
  
-       ret = ring_buffer_resize(global_trace.buffer, size, cpu);
+       ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
        if (ret < 0)
                return ret;
  
-       if (!current_trace->use_max_tr)
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
+           !tr->current_trace->use_max_tr)
                goto out;
  
-       ret = ring_buffer_resize(max_tr.buffer, size, cpu);
+       ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
        if (ret < 0) {
-               int r = resize_buffer_duplicate_size(&global_trace,
-                                                    &global_trace, cpu);
+               int r = resize_buffer_duplicate_size(&tr->trace_buffer,
+                                                    &tr->trace_buffer, cpu);
                if (r < 0) {
                        /*
                         * AARGH! We are left with different
        }
  
        if (cpu == RING_BUFFER_ALL_CPUS)
-               set_buffer_entries(&max_tr, size);
+               set_buffer_entries(&tr->max_buffer, size);
        else
-               max_tr.data[cpu]->entries = size;
+               per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
  
   out:
+ #endif /* CONFIG_TRACER_MAX_TRACE */
        if (cpu == RING_BUFFER_ALL_CPUS)
-               set_buffer_entries(&global_trace, size);
+               set_buffer_entries(&tr->trace_buffer, size);
        else
-               global_trace.data[cpu]->entries = size;
+               per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
  
        return ret;
  }
  
- static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
+ static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
+                                         unsigned long size, int cpu_id)
  {
        int ret = size;
  
                }
        }
  
-       ret = __tracing_resize_ring_buffer(size, cpu_id);
+       ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
        if (ret < 0)
                ret = -ENOMEM;
  
@@@ -3233,7 -3610,7 +3613,7 @@@ int tracing_update_buffers(void
  
        mutex_lock(&trace_types_lock);
        if (!ring_buffer_expanded)
-               ret = __tracing_resize_ring_buffer(trace_buf_size,
+               ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
                                                RING_BUFFER_ALL_CPUS);
        mutex_unlock(&trace_types_lock);
  
  struct trace_option_dentry;
  
  static struct trace_option_dentry *
- create_trace_option_files(struct tracer *tracer);
+ create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
  
  static void
  destroy_trace_option_files(struct trace_option_dentry *topts);
@@@ -3253,13 -3630,15 +3633,15 @@@ static int tracing_set_tracer(const cha
        static struct trace_option_dentry *topts;
        struct trace_array *tr = &global_trace;
        struct tracer *t;
+ #ifdef CONFIG_TRACER_MAX_TRACE
        bool had_max_tr;
+ #endif
        int ret = 0;
  
        mutex_lock(&trace_types_lock);
  
        if (!ring_buffer_expanded) {
-               ret = __tracing_resize_ring_buffer(trace_buf_size,
+               ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
                                                RING_BUFFER_ALL_CPUS);
                if (ret < 0)
                        goto out;
                ret = -EINVAL;
                goto out;
        }
-       if (t == current_trace)
+       if (t == tr->current_trace)
                goto out;
  
        trace_branch_disable();
  
-       current_trace->enabled = false;
+       tr->current_trace->enabled = false;
  
-       if (current_trace->reset)
-               current_trace->reset(tr);
+       if (tr->current_trace->reset)
+               tr->current_trace->reset(tr);
  
-       had_max_tr = current_trace->allocated_snapshot;
-       current_trace = &nop_trace;
+       /* Current trace needs to be nop_trace before synchronize_sched */
+       tr->current_trace = &nop_trace;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       had_max_tr = tr->allocated_snapshot;
  
        if (had_max_tr && !t->use_max_tr) {
                /*
                 * so a synchronized_sched() is sufficient.
                 */
                synchronize_sched();
-               /*
-                * We don't free the ring buffer. instead, resize it because
-                * The max_tr ring buffer has some state (e.g. ring->clock) and
-                * we want preserve it.
-                */
-               ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
-               set_buffer_entries(&max_tr, 1);
-               tracing_reset_online_cpus(&max_tr);
-               current_trace->allocated_snapshot = false;
+               free_snapshot(tr);
        }
+ #endif
        destroy_trace_option_files(topts);
  
-       topts = create_trace_option_files(t);
+       topts = create_trace_option_files(tr, t);
+ #ifdef CONFIG_TRACER_MAX_TRACE
        if (t->use_max_tr && !had_max_tr) {
-               /* we need to make per cpu buffer sizes equivalent */
-               ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
-                                                  RING_BUFFER_ALL_CPUS);
+               ret = alloc_snapshot(tr);
                if (ret < 0)
                        goto out;
-               t->allocated_snapshot = true;
        }
+ #endif
  
        if (t->init) {
                ret = tracer_init(t, tr);
                        goto out;
        }
  
-       current_trace = t;
-       current_trace->enabled = true;
+       tr->current_trace = t;
+       tr->current_trace->enabled = true;
        trace_branch_enable(tr);
   out:
        mutex_unlock(&trace_types_lock);
@@@ -3399,7 -3774,8 +3777,8 @@@ tracing_max_lat_write(struct file *filp
  
  static int tracing_open_pipe(struct inode *inode, struct file *filp)
  {
-       long cpu_file = (long) inode->i_private;
+       struct trace_cpu *tc = inode->i_private;
+       struct trace_array *tr = tc->tr;
        struct trace_iterator *iter;
        int ret = 0;
  
                ret = -ENOMEM;
                goto fail;
        }
-       *iter->trace = *current_trace;
+       *iter->trace = *tr->current_trace;
  
        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
                ret = -ENOMEM;
        if (trace_clocks[trace_clock_id].in_ns)
                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
  
-       iter->cpu_file = cpu_file;
-       iter->tr = &global_trace;
+       iter->cpu_file = tc->cpu;
+       iter->tr = tc->tr;
+       iter->trace_buffer = &tc->tr->trace_buffer;
        mutex_init(&iter->mutex);
        filp->private_data = iter;
  
@@@ -3481,24 -3858,28 +3861,28 @@@ static int tracing_release_pipe(struct 
  }
  
  static unsigned int
- tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
  {
-       struct trace_iterator *iter = filp->private_data;
+       /* Iterators are static, they should be filled or empty */
+       if (trace_buffer_iter(iter, iter->cpu_file))
+               return POLLIN | POLLRDNORM;
  
-       if (trace_flags & TRACE_ITER_BLOCK) {
+       if (trace_flags & TRACE_ITER_BLOCK)
                /*
                 * Always select as readable when in blocking mode
                 */
                return POLLIN | POLLRDNORM;
-       } else {
-               if (!trace_empty(iter))
-                       return POLLIN | POLLRDNORM;
-               poll_wait(filp, &trace_wait, poll_table);
-               if (!trace_empty(iter))
-                       return POLLIN | POLLRDNORM;
+       else
+               return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
+                                            filp, poll_table);
+ }
  
-               return 0;
-       }
+ static unsigned int
+ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+ {
+       struct trace_iterator *iter = filp->private_data;
+       return trace_poll(iter, filp, poll_table);
  }
  
  /*
@@@ -3564,6 -3945,7 +3948,7 @@@ tracing_read_pipe(struct file *filp, ch
                  size_t cnt, loff_t *ppos)
  {
        struct trace_iterator *iter = filp->private_data;
+       struct trace_array *tr = iter->tr;
        ssize_t sret;
  
        /* return any leftover data */
  
        /* copy the tracer to avoid using a global lock all around */
        mutex_lock(&trace_types_lock);
-       if (unlikely(iter->trace->name != current_trace->name))
-               *iter->trace = *current_trace;
+       if (unlikely(iter->trace->name != tr->current_trace->name))
+               *iter->trace = *tr->current_trace;
        mutex_unlock(&trace_types_lock);
  
        /*
@@@ -3732,6 -4114,7 +4117,7 @@@ static ssize_t tracing_splice_read_pipe
                .ops            = &tracing_pipe_buf_ops,
                .spd_release    = tracing_spd_release_pipe,
        };
+       struct trace_array *tr = iter->tr;
        ssize_t ret;
        size_t rem;
        unsigned int i;
  
        /* copy the tracer to avoid using a global lock all around */
        mutex_lock(&trace_types_lock);
-       if (unlikely(iter->trace->name != current_trace->name))
-               *iter->trace = *current_trace;
+       if (unlikely(iter->trace->name != tr->current_trace->name))
+               *iter->trace = *tr->current_trace;
        mutex_unlock(&trace_types_lock);
  
        mutex_lock(&iter->mutex);
@@@ -3804,43 -4187,19 +4190,19 @@@ out_err
        goto out;
  }
  
- struct ftrace_entries_info {
-       struct trace_array      *tr;
-       int                     cpu;
- };
- static int tracing_entries_open(struct inode *inode, struct file *filp)
- {
-       struct ftrace_entries_info *info;
-       if (tracing_disabled)
-               return -ENODEV;
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
-       info->tr = &global_trace;
-       info->cpu = (unsigned long)inode->i_private;
-       filp->private_data = info;
-       return 0;
- }
  static ssize_t
  tracing_entries_read(struct file *filp, char __user *ubuf,
                     size_t cnt, loff_t *ppos)
  {
-       struct ftrace_entries_info *info = filp->private_data;
-       struct trace_array *tr = info->tr;
+       struct trace_cpu *tc = filp->private_data;
+       struct trace_array *tr = tc->tr;
        char buf[64];
        int r = 0;
        ssize_t ret;
  
        mutex_lock(&trace_types_lock);
  
-       if (info->cpu == RING_BUFFER_ALL_CPUS) {
+       if (tc->cpu == RING_BUFFER_ALL_CPUS) {
                int cpu, buf_size_same;
                unsigned long size;
  
                for_each_tracing_cpu(cpu) {
                        /* fill in the size from first enabled cpu */
                        if (size == 0)
-                               size = tr->data[cpu]->entries;
-                       if (size != tr->data[cpu]->entries) {
+                               size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
+                       if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
                                buf_size_same = 0;
                                break;
                        }
                } else
                        r = sprintf(buf, "X\n");
        } else
-               r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
+               r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
  
        mutex_unlock(&trace_types_lock);
  
@@@ -3879,7 -4238,7 +4241,7 @@@ static ssize_
  tracing_entries_write(struct file *filp, const char __user *ubuf,
                      size_t cnt, loff_t *ppos)
  {
-       struct ftrace_entries_info *info = filp->private_data;
+       struct trace_cpu *tc = filp->private_data;
        unsigned long val;
        int ret;
  
        /* value is in KB */
        val <<= 10;
  
-       ret = tracing_resize_ring_buffer(val, info->cpu);
+       ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
        if (ret < 0)
                return ret;
  
        return cnt;
  }
  
- static int
- tracing_entries_release(struct inode *inode, struct file *filp)
- {
-       struct ftrace_entries_info *info = filp->private_data;
-       kfree(info);
-       return 0;
- }
  static ssize_t
  tracing_total_entries_read(struct file *filp, char __user *ubuf,
                                size_t cnt, loff_t *ppos)
  
        mutex_lock(&trace_types_lock);
        for_each_tracing_cpu(cpu) {
-               size += tr->data[cpu]->entries >> 10;
+               size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
                if (!ring_buffer_expanded)
                        expanded_size += trace_buf_size >> 10;
        }
@@@ -3954,11 -4303,13 +4306,13 @@@ tracing_free_buffer_write(struct file *
  static int
  tracing_free_buffer_release(struct inode *inode, struct file *filp)
  {
+       struct trace_array *tr = inode->i_private;
        /* disable tracing ? */
        if (trace_flags & TRACE_ITER_STOP_ON_FREE)
                tracing_off();
        /* resize the ring buffer to 0 */
-       tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
+       tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
  
        return 0;
  }
@@@ -4027,7 -4378,7 +4381,7 @@@ tracing_mark_write(struct file *filp, c
  
        local_save_flags(irq_flags);
        size = sizeof(*entry) + cnt + 2; /* possible \n added */
-       buffer = global_trace.buffer;
+       buffer = global_trace.trace_buffer.buffer;
        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                                          irq_flags, preempt_count());
        if (!event) {
  
  static int tracing_clock_show(struct seq_file *m, void *v)
  {
+       struct trace_array *tr = m->private;
        int i;
  
        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
                seq_printf(m,
                        "%s%s%s%s", i ? " " : "",
-                       i == trace_clock_id ? "[" : "", trace_clocks[i].name,
-                       i == trace_clock_id ? "]" : "");
+                       i == tr->clock_id ? "[" : "", trace_clocks[i].name,
+                       i == tr->clock_id ? "]" : "");
        seq_putc(m, '\n');
  
        return 0;
  static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
                                   size_t cnt, loff_t *fpos)
  {
+       struct seq_file *m = filp->private_data;
+       struct trace_array *tr = m->private;
        char buf[64];
        const char *clockstr;
        int i;
        if (i == ARRAY_SIZE(trace_clocks))
                return -EINVAL;
  
-       trace_clock_id = i;
        mutex_lock(&trace_types_lock);
  
-       ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
-       if (max_tr.buffer)
-               ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
+       tr->clock_id = i;
+       ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
  
        /*
         * New clock may not be consistent with the previous clock.
         * Reset the buffer so that it doesn't have incomparable timestamps.
         */
-       tracing_reset_online_cpus(&global_trace);
-       tracing_reset_online_cpus(&max_tr);
+       tracing_reset_online_cpus(&global_trace.trace_buffer);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
+               ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
+       tracing_reset_online_cpus(&global_trace.max_buffer);
+ #endif
  
        mutex_unlock(&trace_types_lock);
  
@@@ -4131,20 -4488,45 +4491,45 @@@ static int tracing_clock_open(struct in
  {
        if (tracing_disabled)
                return -ENODEV;
-       return single_open(file, tracing_clock_show, NULL);
+       return single_open(file, tracing_clock_show, inode->i_private);
  }
  
+ struct ftrace_buffer_info {
+       struct trace_iterator   iter;
+       void                    *spare;
+       unsigned int            read;
+ };
  #ifdef CONFIG_TRACER_SNAPSHOT
  static int tracing_snapshot_open(struct inode *inode, struct file *file)
  {
+       struct trace_cpu *tc = inode->i_private;
        struct trace_iterator *iter;
+       struct seq_file *m;
        int ret = 0;
  
        if (file->f_mode & FMODE_READ) {
                iter = __tracing_open(inode, file, true);
                if (IS_ERR(iter))
                        ret = PTR_ERR(iter);
+       } else {
+               /* Writes still need the seq_file to hold the private data */
+               m = kzalloc(sizeof(*m), GFP_KERNEL);
+               if (!m)
+                       return -ENOMEM;
+               iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+               if (!iter) {
+                       kfree(m);
+                       return -ENOMEM;
+               }
+               iter->tr = tc->tr;
+               iter->trace_buffer = &tc->tr->max_buffer;
+               iter->cpu_file = tc->cpu;
+               m->private = iter;
+               file->private_data = m;
        }
        return ret;
  }
  
@@@ -4152,6 -4534,9 +4537,9 @@@ static ssize_
  tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
                       loff_t *ppos)
  {
+       struct seq_file *m = filp->private_data;
+       struct trace_iterator *iter = m->private;
+       struct trace_array *tr = iter->tr;
        unsigned long val;
        int ret;
  
  
        mutex_lock(&trace_types_lock);
  
-       if (current_trace->use_max_tr) {
+       if (tr->current_trace->use_max_tr) {
                ret = -EBUSY;
                goto out;
        }
  
        switch (val) {
        case 0:
-               if (current_trace->allocated_snapshot) {
-                       /* free spare buffer */
-                       ring_buffer_resize(max_tr.buffer, 1,
-                                          RING_BUFFER_ALL_CPUS);
-                       set_buffer_entries(&max_tr, 1);
-                       tracing_reset_online_cpus(&max_tr);
-                       current_trace->allocated_snapshot = false;
+               if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+                       ret = -EINVAL;
+                       break;
                }
+               if (tr->allocated_snapshot)
+                       free_snapshot(tr);
                break;
        case 1:
-               if (!current_trace->allocated_snapshot) {
-                       /* allocate spare buffer */
-                       ret = resize_buffer_duplicate_size(&max_tr,
-                                       &global_trace, RING_BUFFER_ALL_CPUS);
+ /* Only allow per-cpu swap if the ring buffer supports it */
+ #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
+               if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+                       ret = -EINVAL;
+                       break;
+               }
+ #endif
+               if (!tr->allocated_snapshot) {
+                       ret = alloc_snapshot(tr);
                        if (ret < 0)
                                break;
-                       current_trace->allocated_snapshot = true;
                }
                local_irq_disable();
                /* Now, we're going to swap */
-               update_max_tr(&global_trace, current, smp_processor_id());
+               if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+                       update_max_tr(tr, current, smp_processor_id());
+               else
+                       update_max_tr_single(tr, current, iter->cpu_file);
                local_irq_enable();
                break;
        default:
-               if (current_trace->allocated_snapshot)
-                       tracing_reset_online_cpus(&max_tr);
+               if (tr->allocated_snapshot) {
+                       if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+                               tracing_reset_online_cpus(&tr->max_buffer);
+                       else
+                               tracing_reset(&tr->max_buffer, iter->cpu_file);
+               }
                break;
        }
  
        mutex_unlock(&trace_types_lock);
        return ret;
  }
+ static int tracing_snapshot_release(struct inode *inode, struct file *file)
+ {
+       struct seq_file *m = file->private_data;
+       if (file->f_mode & FMODE_READ)
+               return tracing_release(inode, file);
+       /* If write only, the seq_file is just a stub */
+       if (m)
+               kfree(m->private);
+       kfree(m);
+       return 0;
+ }
+ static int tracing_buffers_open(struct inode *inode, struct file *filp);
+ static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
+                                   size_t count, loff_t *ppos);
+ static int tracing_buffers_release(struct inode *inode, struct file *file);
+ static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+                  struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+ static int snapshot_raw_open(struct inode *inode, struct file *filp)
+ {
+       struct ftrace_buffer_info *info;
+       int ret;
+       ret = tracing_buffers_open(inode, filp);
+       if (ret < 0)
+               return ret;
+       info = filp->private_data;
+       if (info->iter.trace->use_max_tr) {
+               tracing_buffers_release(inode, filp);
+               return -EBUSY;
+       }
+       info->iter.snapshot = true;
+       info->iter.trace_buffer = &info->iter.tr->max_buffer;
+       return ret;
+ }
  #endif /* CONFIG_TRACER_SNAPSHOT */
  
  
@@@ -4237,10 -4675,9 +4678,9 @@@ static const struct file_operations tra
  };
  
  static const struct file_operations tracing_entries_fops = {
-       .open           = tracing_entries_open,
+       .open           = tracing_open_generic,
        .read           = tracing_entries_read,
        .write          = tracing_entries_write,
-       .release        = tracing_entries_release,
        .llseek         = generic_file_llseek,
  };
  
@@@ -4275,20 -4712,23 +4715,23 @@@ static const struct file_operations sna
        .read           = seq_read,
        .write          = tracing_snapshot_write,
        .llseek         = tracing_seek,
-       .release        = tracing_release,
+       .release        = tracing_snapshot_release,
  };
- #endif /* CONFIG_TRACER_SNAPSHOT */
  
- struct ftrace_buffer_info {
-       struct trace_array      *tr;
-       void                    *spare;
-       int                     cpu;
-       unsigned int            read;
+ static const struct file_operations snapshot_raw_fops = {
+       .open           = snapshot_raw_open,
+       .read           = tracing_buffers_read,
+       .release        = tracing_buffers_release,
+       .splice_read    = tracing_buffers_splice_read,
+       .llseek         = no_llseek,
  };
  
+ #endif /* CONFIG_TRACER_SNAPSHOT */
  static int tracing_buffers_open(struct inode *inode, struct file *filp)
  {
-       int cpu = (int)(long)inode->i_private;
+       struct trace_cpu *tc = inode->i_private;
+       struct trace_array *tr = tc->tr;
        struct ftrace_buffer_info *info;
  
        if (tracing_disabled)
        if (!info)
                return -ENOMEM;
  
-       info->tr        = &global_trace;
-       info->cpu       = cpu;
-       info->spare     = NULL;
+       mutex_lock(&trace_types_lock);
+       tr->ref++;
+       info->iter.tr           = tr;
+       info->iter.cpu_file     = tc->cpu;
+       info->iter.trace        = tr->current_trace;
+       info->iter.trace_buffer = &tr->trace_buffer;
+       info->spare             = NULL;
        /* Force reading ring buffer for first read */
-       info->read      = (unsigned int)-1;
+       info->read              = (unsigned int)-1;
  
        filp->private_data = info;
  
+       mutex_unlock(&trace_types_lock);
        return nonseekable_open(inode, filp);
  }
  
+ static unsigned int
+ tracing_buffers_poll(struct file *filp, poll_table *poll_table)
+ {
+       struct ftrace_buffer_info *info = filp->private_data;
+       struct trace_iterator *iter = &info->iter;
+       return trace_poll(iter, filp, poll_table);
+ }
  static ssize_t
  tracing_buffers_read(struct file *filp, char __user *ubuf,
                     size_t count, loff_t *ppos)
  {
        struct ftrace_buffer_info *info = filp->private_data;
+       struct trace_iterator *iter = &info->iter;
        ssize_t ret;
-       size_t size;
+       ssize_t size;
  
        if (!count)
                return 0;
  
+       mutex_lock(&trace_types_lock);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
+               size = -EBUSY;
+               goto out_unlock;
+       }
+ #endif
        if (!info->spare)
-               info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
+               info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
+                                                         iter->cpu_file);
+       size = -ENOMEM;
        if (!info->spare)
-               return -ENOMEM;
+               goto out_unlock;
  
        /* Do we have previous read data to read? */
        if (info->read < PAGE_SIZE)
                goto read;
  
-       trace_access_lock(info->cpu);
-       ret = ring_buffer_read_page(info->tr->buffer,
+  again:
+       trace_access_lock(iter->cpu_file);
+       ret = ring_buffer_read_page(iter->trace_buffer->buffer,
                                    &info->spare,
                                    count,
-                                   info->cpu, 0);
-       trace_access_unlock(info->cpu);
-       if (ret < 0)
-               return 0;
+                                   iter->cpu_file, 0);
+       trace_access_unlock(iter->cpu_file);
  
-       info->read = 0;
+       if (ret < 0) {
+               if (trace_empty(iter)) {
+                       if ((filp->f_flags & O_NONBLOCK)) {
+                               size = -EAGAIN;
+                               goto out_unlock;
+                       }
+                       mutex_unlock(&trace_types_lock);
+                       iter->trace->wait_pipe(iter);
+                       mutex_lock(&trace_types_lock);
+                       if (signal_pending(current)) {
+                               size = -EINTR;
+                               goto out_unlock;
+                       }
+                       goto again;
+               }
+               size = 0;
+               goto out_unlock;
+       }
  
- read:
+       info->read = 0;
+  read:
        size = PAGE_SIZE - info->read;
        if (size > count)
                size = count;
  
        ret = copy_to_user(ubuf, info->spare + info->read, size);
-       if (ret == size)
-               return -EFAULT;
+       if (ret == size) {
+               size = -EFAULT;
+               goto out_unlock;
+       }
        size -= ret;
  
        *ppos += size;
        info->read += size;
  
+  out_unlock:
+       mutex_unlock(&trace_types_lock);
        return size;
  }
  
  static int tracing_buffers_release(struct inode *inode, struct file *file)
  {
        struct ftrace_buffer_info *info = file->private_data;
+       struct trace_iterator *iter = &info->iter;
+       mutex_lock(&trace_types_lock);
+       WARN_ON(!iter->tr->ref);
+       iter->tr->ref--;
  
        if (info->spare)
-               ring_buffer_free_read_page(info->tr->buffer, info->spare);
+               ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
        kfree(info);
  
+       mutex_unlock(&trace_types_lock);
        return 0;
  }
  
@@@ -4428,6 -4927,7 +4930,7 @@@ tracing_buffers_splice_read(struct fil
                            unsigned int flags)
  {
        struct ftrace_buffer_info *info = file->private_data;
+       struct trace_iterator *iter = &info->iter;
        struct partial_page partial_def[PIPE_DEF_BUFFERS];
        struct page *pages_def[PIPE_DEF_BUFFERS];
        struct splice_pipe_desc spd = {
        };
        struct buffer_ref *ref;
        int entries, size, i;
-       size_t ret;
+       ssize_t ret;
  
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
+       mutex_lock(&trace_types_lock);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
+               ret = -EBUSY;
+               goto out;
+       }
+ #endif
+       if (splice_grow_spd(pipe, &spd)) {
+               ret = -ENOMEM;
+               goto out;
+       }
  
        if (*ppos & (PAGE_SIZE - 1)) {
                ret = -EINVAL;
                len &= PAGE_MASK;
        }
  
-       trace_access_lock(info->cpu);
-       entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+  again:
+       trace_access_lock(iter->cpu_file);
+       entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
  
        for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                        break;
  
                ref->ref = 1;
-               ref->buffer = info->tr->buffer;
-               ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
+               ref->buffer = iter->trace_buffer->buffer;
+               ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
                if (!ref->page) {
                        kfree(ref);
                        break;
                }
  
                r = ring_buffer_read_page(ref->buffer, &ref->page,
-                                         len, info->cpu, 1);
+                                         len, iter->cpu_file, 1);
                if (r < 0) {
                        ring_buffer_free_read_page(ref->buffer, ref->page);
                        kfree(ref);
                spd.nr_pages++;
                *ppos += PAGE_SIZE;
  
-               entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+               entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
        }
  
-       trace_access_unlock(info->cpu);
+       trace_access_unlock(iter->cpu_file);
        spd.nr_pages = i;
  
        /* did we read anything? */
        if (!spd.nr_pages) {
-               if (flags & SPLICE_F_NONBLOCK)
+               if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
                        ret = -EAGAIN;
-               else
-                       ret = 0;
-               /* TODO: block */
-               goto out;
+                       goto out;
+               }
+               mutex_unlock(&trace_types_lock);
+               iter->trace->wait_pipe(iter);
+               mutex_lock(&trace_types_lock);
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       goto out;
+               }
+               goto again;
        }
  
        ret = splice_to_pipe(pipe, &spd);
        splice_shrink_spd(&spd);
  out:
+       mutex_unlock(&trace_types_lock);
        return ret;
  }
  
  static const struct file_operations tracing_buffers_fops = {
        .open           = tracing_buffers_open,
        .read           = tracing_buffers_read,
+       .poll           = tracing_buffers_poll,
        .release        = tracing_buffers_release,
        .splice_read    = tracing_buffers_splice_read,
        .llseek         = no_llseek,
@@@ -4536,12 -5057,14 +5060,14 @@@ static ssize_
  tracing_stats_read(struct file *filp, char __user *ubuf,
                   size_t count, loff_t *ppos)
  {
-       unsigned long cpu = (unsigned long)filp->private_data;
-       struct trace_array *tr = &global_trace;
+       struct trace_cpu *tc = filp->private_data;
+       struct trace_array *tr = tc->tr;
+       struct trace_buffer *trace_buf = &tr->trace_buffer;
        struct trace_seq *s;
        unsigned long cnt;
        unsigned long long t;
        unsigned long usec_rem;
+       int cpu = tc->cpu;
  
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s)
  
        trace_seq_init(s);
  
-       cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "entries: %ld\n", cnt);
  
-       cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "overrun: %ld\n", cnt);
  
-       cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
  
-       cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "bytes: %ld\n", cnt);
  
        if (trace_clocks[trace_clock_id].in_ns) {
                /* local or global for trace_clock */
-               t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+               t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
                usec_rem = do_div(t, USEC_PER_SEC);
                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
                                                                t, usec_rem);
  
-               t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+               t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
                usec_rem = do_div(t, USEC_PER_SEC);
                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
        } else {
                /* counter or tsc mode for trace_clock */
                trace_seq_printf(s, "oldest event ts: %llu\n",
-                               ring_buffer_oldest_event_ts(tr->buffer, cpu));
+                               ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
  
                trace_seq_printf(s, "now ts: %llu\n",
-                               ring_buffer_time_stamp(tr->buffer, cpu));
+                               ring_buffer_time_stamp(trace_buf->buffer, cpu));
        }
  
-       cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "dropped events: %ld\n", cnt);
  
-       cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
+       cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
        trace_seq_printf(s, "read events: %ld\n", cnt);
  
        count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
@@@ -4635,60 -5158,161 +5161,161 @@@ static const struct file_operations tra
        .read           = tracing_read_dyn_info,
        .llseek         = generic_file_llseek,
  };
- #endif
+ #endif /* CONFIG_DYNAMIC_FTRACE */
  
- static struct dentry *d_tracer;
+ #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
+ static void
+ ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
+ {
+       tracing_snapshot();
+ }
  
- struct dentry *tracing_init_dentry(void)
+ static void
+ ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
  {
-       static int once;
+       unsigned long *count = (long *)data;
+       if (!*count)
+               return;
  
-       if (d_tracer)
-               return d_tracer;
+       if (*count != -1)
+               (*count)--;
+       tracing_snapshot();
+ }
+ static int
+ ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
+                     struct ftrace_probe_ops *ops, void *data)
+ {
+       long count = (long)data;
+       seq_printf(m, "%ps:", (void *)ip);
+       seq_printf(m, "snapshot");
+       if (count == -1)
+               seq_printf(m, ":unlimited\n");
+       else
+               seq_printf(m, ":count=%ld\n", count);
+       return 0;
+ }
+ static struct ftrace_probe_ops snapshot_probe_ops = {
+       .func                   = ftrace_snapshot,
+       .print                  = ftrace_snapshot_print,
+ };
+ static struct ftrace_probe_ops snapshot_count_probe_ops = {
+       .func                   = ftrace_count_snapshot,
+       .print                  = ftrace_snapshot_print,
+ };
+ static int
+ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
+                              char *glob, char *cmd, char *param, int enable)
+ {
+       struct ftrace_probe_ops *ops;
+       void *count = (void *)-1;
+       char *number;
+       int ret;
+       /* hash funcs only work with set_ftrace_filter */
+       if (!enable)
+               return -EINVAL;
+       ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
+       if (glob[0] == '!') {
+               unregister_ftrace_function_probe_func(glob+1, ops);
+               return 0;
+       }
+       if (!param)
+               goto out_reg;
+       number = strsep(&param, ":");
+       if (!strlen(number))
+               goto out_reg;
+       /*
+        * We use the callback data field (which is a pointer)
+        * as our counter.
+        */
+       ret = kstrtoul(number, 0, (unsigned long *)&count);
+       if (ret)
+               return ret;
+  out_reg:
+       ret = register_ftrace_function_probe(glob, ops, count);
+       if (ret >= 0)
+               alloc_snapshot(&global_trace);
+       return ret < 0 ? ret : 0;
+ }
+ static struct ftrace_func_command ftrace_snapshot_cmd = {
+       .name                   = "snapshot",
+       .func                   = ftrace_trace_snapshot_callback,
+ };
+ static int register_snapshot_cmd(void)
+ {
+       return register_ftrace_command(&ftrace_snapshot_cmd);
+ }
+ #else
+ static inline int register_snapshot_cmd(void) { return 0; }
+ #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
+ struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
+ {
+       if (tr->dir)
+               return tr->dir;
  
        if (!debugfs_initialized())
                return NULL;
  
-       d_tracer = debugfs_create_dir("tracing", NULL);
+       if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+               tr->dir = debugfs_create_dir("tracing", NULL);
  
-       if (!d_tracer && !once) {
-               once = 1;
-               pr_warning("Could not create debugfs directory 'tracing'\n");
-               return NULL;
-       }
+       if (!tr->dir)
+               pr_warn_once("Could not create debugfs directory 'tracing'\n");
  
-       return d_tracer;
+       return tr->dir;
  }
  
- static struct dentry *d_percpu;
+ struct dentry *tracing_init_dentry(void)
+ {
+       return tracing_init_dentry_tr(&global_trace);
+ }
  
- static struct dentry *tracing_dentry_percpu(void)
+ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
  {
-       static int once;
        struct dentry *d_tracer;
  
-       if (d_percpu)
-               return d_percpu;
-       d_tracer = tracing_init_dentry();
+       if (tr->percpu_dir)
+               return tr->percpu_dir;
  
+       d_tracer = tracing_init_dentry_tr(tr);
        if (!d_tracer)
                return NULL;
  
-       d_percpu = debugfs_create_dir("per_cpu", d_tracer);
+       tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
  
-       if (!d_percpu && !once) {
-               once = 1;
-               pr_warning("Could not create debugfs directory 'per_cpu'\n");
-               return NULL;
-       }
+       WARN_ONCE(!tr->percpu_dir,
+                 "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
  
-       return d_percpu;
+       return tr->percpu_dir;
  }
  
- static void tracing_init_debugfs_percpu(long cpu)
+ static void
+ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
  {
-       struct dentry *d_percpu = tracing_dentry_percpu();
+       struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
+       struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
        struct dentry *d_cpu;
        char cpu_dir[30]; /* 30 characters should be more than enough */
  
  
        /* per cpu trace_pipe */
        trace_create_file("trace_pipe", 0444, d_cpu,
-                       (void *) cpu, &tracing_pipe_fops);
+                       (void *)&data->trace_cpu, &tracing_pipe_fops);
  
        /* per cpu trace */
        trace_create_file("trace", 0644, d_cpu,
-                       (void *) cpu, &tracing_fops);
+                       (void *)&data->trace_cpu, &tracing_fops);
  
        trace_create_file("trace_pipe_raw", 0444, d_cpu,
-                       (void *) cpu, &tracing_buffers_fops);
+                       (void *)&data->trace_cpu, &tracing_buffers_fops);
  
        trace_create_file("stats", 0444, d_cpu,
-                       (void *) cpu, &tracing_stats_fops);
+                       (void *)&data->trace_cpu, &tracing_stats_fops);
  
        trace_create_file("buffer_size_kb", 0444, d_cpu,
-                       (void *) cpu, &tracing_entries_fops);
+                       (void *)&data->trace_cpu, &tracing_entries_fops);
+ #ifdef CONFIG_TRACER_SNAPSHOT
+       trace_create_file("snapshot", 0644, d_cpu,
+                         (void *)&data->trace_cpu, &snapshot_fops);
+       trace_create_file("snapshot_raw", 0444, d_cpu,
+                       (void *)&data->trace_cpu, &snapshot_raw_fops);
+ #endif
  }
  
  #ifdef CONFIG_FTRACE_SELFTEST
  struct trace_option_dentry {
        struct tracer_opt               *opt;
        struct tracer_flags             *flags;
+       struct trace_array              *tr;
        struct dentry                   *entry;
  };
  
@@@ -4763,7 -5396,7 +5399,7 @@@ trace_options_write(struct file *filp, 
  
        if (!!(topt->flags->val & topt->opt->bit) != val) {
                mutex_lock(&trace_types_lock);
-               ret = __set_tracer_option(current_trace, topt->flags,
+               ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
                                          topt->opt, !val);
                mutex_unlock(&trace_types_lock);
                if (ret)
@@@ -4802,6 -5435,7 +5438,7 @@@ static ssize_
  trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
                         loff_t *ppos)
  {
+       struct trace_array *tr = &global_trace;
        long index = (long)filp->private_data;
        unsigned long val;
        int ret;
                return -EINVAL;
  
        mutex_lock(&trace_types_lock);
-       ret = set_tracer_flag(1 << index, val);
+       ret = set_tracer_flag(tr, 1 << index, val);
        mutex_unlock(&trace_types_lock);
  
        if (ret < 0)
@@@ -4848,40 -5482,41 +5485,41 @@@ struct dentry *trace_create_file(const 
  }
  
  
- static struct dentry *trace_options_init_dentry(void)
+ static struct dentry *trace_options_init_dentry(struct trace_array *tr)
  {
        struct dentry *d_tracer;
-       static struct dentry *t_options;
  
-       if (t_options)
-               return t_options;
+       if (tr->options)
+               return tr->options;
  
-       d_tracer = tracing_init_dentry();
+       d_tracer = tracing_init_dentry_tr(tr);
        if (!d_tracer)
                return NULL;
  
-       t_options = debugfs_create_dir("options", d_tracer);
-       if (!t_options) {
+       tr->options = debugfs_create_dir("options", d_tracer);
+       if (!tr->options) {
                pr_warning("Could not create debugfs directory 'options'\n");
                return NULL;
        }
  
-       return t_options;
+       return tr->options;
  }
  
  static void
- create_trace_option_file(struct trace_option_dentry *topt,
+ create_trace_option_file(struct trace_array *tr,
+                        struct trace_option_dentry *topt,
                         struct tracer_flags *flags,
                         struct tracer_opt *opt)
  {
        struct dentry *t_options;
  
-       t_options = trace_options_init_dentry();
+       t_options = trace_options_init_dentry(tr);
        if (!t_options)
                return;
  
        topt->flags = flags;
        topt->opt = opt;
+       topt->tr = tr;
  
        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
                                    &trace_options_fops);
  }
  
  static struct trace_option_dentry *
- create_trace_option_files(struct tracer *tracer)
+ create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
  {
        struct trace_option_dentry *topts;
        struct tracer_flags *flags;
                return NULL;
  
        for (cnt = 0; opts[cnt].name; cnt++)
-               create_trace_option_file(&topts[cnt], flags,
+               create_trace_option_file(tr, &topts[cnt], flags,
                                         &opts[cnt]);
  
        return topts;
@@@ -4937,11 -5572,12 +5575,12 @@@ destroy_trace_option_files(struct trace
  }
  
  static struct dentry *
- create_trace_option_core_file(const char *option, long index)
+ create_trace_option_core_file(struct trace_array *tr,
+                             const char *option, long index)
  {
        struct dentry *t_options;
  
-       t_options = trace_options_init_dentry();
+       t_options = trace_options_init_dentry(tr);
        if (!t_options)
                return NULL;
  
                                    &trace_options_core_fops);
  }
  
- static __init void create_trace_options_dir(void)
+ static __init void create_trace_options_dir(struct trace_array *tr)
  {
        struct dentry *t_options;
        int i;
  
-       t_options = trace_options_init_dentry();
+       t_options = trace_options_init_dentry(tr);
        if (!t_options)
                return;
  
        for (i = 0; trace_options[i]; i++)
-               create_trace_option_core_file(trace_options[i], i);
+               create_trace_option_core_file(tr, trace_options[i], i);
  }
  
  static ssize_t
@@@ -4967,7 -5603,7 +5606,7 @@@ rb_simple_read(struct file *filp, char 
               size_t cnt, loff_t *ppos)
  {
        struct trace_array *tr = filp->private_data;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        char buf[64];
        int r;
  
@@@ -4986,7 -5622,7 +5625,7 @@@ rb_simple_write(struct file *filp, cons
                size_t cnt, loff_t *ppos)
  {
        struct trace_array *tr = filp->private_data;
-       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer *buffer = tr->trace_buffer.buffer;
        unsigned long val;
        int ret;
  
                mutex_lock(&trace_types_lock);
                if (val) {
                        ring_buffer_record_on(buffer);
-                       if (current_trace->start)
-                               current_trace->start(tr);
+                       if (tr->current_trace->start)
+                               tr->current_trace->start(tr);
                } else {
                        ring_buffer_record_off(buffer);
-                       if (current_trace->stop)
-                               current_trace->stop(tr);
+                       if (tr->current_trace->stop)
+                               tr->current_trace->stop(tr);
                }
                mutex_unlock(&trace_types_lock);
        }
@@@ -5020,23 -5656,310 +5659,310 @@@ static const struct file_operations rb_
        .llseek         = default_llseek,
  };
  
+ struct dentry *trace_instance_dir;
+ static void
+ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
+ static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
+ {
+       int cpu;
+       for_each_tracing_cpu(cpu) {
+               memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
+               per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
+               per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
+       }
+ }
+ static int
+ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
+ {
+       enum ring_buffer_flags rb_flags;
+       rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+       buf->buffer = ring_buffer_alloc(size, rb_flags);
+       if (!buf->buffer)
+               return -ENOMEM;
+       buf->data = alloc_percpu(struct trace_array_cpu);
+       if (!buf->data) {
+               ring_buffer_free(buf->buffer);
+               return -ENOMEM;
+       }
+       init_trace_buffers(tr, buf);
+       /* Allocate the first page for all buffers */
+       set_buffer_entries(&tr->trace_buffer,
+                          ring_buffer_size(tr->trace_buffer.buffer, 0));
+       return 0;
+ }
+ static int allocate_trace_buffers(struct trace_array *tr, int size)
+ {
+       int ret;
+       ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
+       if (ret)
+               return ret;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       ret = allocate_trace_buffer(tr, &tr->max_buffer,
+                                   allocate_snapshot ? size : 1);
+       if (WARN_ON(ret)) {
+               ring_buffer_free(tr->trace_buffer.buffer);
+               free_percpu(tr->trace_buffer.data);
+               return -ENOMEM;
+       }
+       tr->allocated_snapshot = allocate_snapshot;
+       /*
+        * Only the top level trace array gets its snapshot allocated
+        * from the kernel command line.
+        */
+       allocate_snapshot = false;
+ #endif
+       return 0;
+ }
+ static int new_instance_create(const char *name)
+ {
+       struct trace_array *tr;
+       int ret;
+       mutex_lock(&trace_types_lock);
+       ret = -EEXIST;
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               if (tr->name && strcmp(tr->name, name) == 0)
+                       goto out_unlock;
+       }
+       ret = -ENOMEM;
+       tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+       if (!tr)
+               goto out_unlock;
+       tr->name = kstrdup(name, GFP_KERNEL);
+       if (!tr->name)
+               goto out_free_tr;
+       raw_spin_lock_init(&tr->start_lock);
+       tr->current_trace = &nop_trace;
+       INIT_LIST_HEAD(&tr->systems);
+       INIT_LIST_HEAD(&tr->events);
+       if (allocate_trace_buffers(tr, trace_buf_size) < 0)
+               goto out_free_tr;
+       /* Holder for file callbacks */
+       tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
+       tr->trace_cpu.tr = tr;
+       tr->dir = debugfs_create_dir(name, trace_instance_dir);
+       if (!tr->dir)
+               goto out_free_tr;
+       ret = event_trace_add_tracer(tr->dir, tr);
+       if (ret)
+               goto out_free_tr;
+       init_tracer_debugfs(tr, tr->dir);
+       list_add(&tr->list, &ftrace_trace_arrays);
+       mutex_unlock(&trace_types_lock);
+       return 0;
+  out_free_tr:
+       if (tr->trace_buffer.buffer)
+               ring_buffer_free(tr->trace_buffer.buffer);
+       kfree(tr->name);
+       kfree(tr);
+  out_unlock:
+       mutex_unlock(&trace_types_lock);
+       return ret;
+ }
+ static int instance_delete(const char *name)
+ {
+       struct trace_array *tr;
+       int found = 0;
+       int ret;
+       mutex_lock(&trace_types_lock);
+       ret = -ENODEV;
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               if (tr->name && strcmp(tr->name, name) == 0) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found)
+               goto out_unlock;
+       ret = -EBUSY;
+       if (tr->ref)
+               goto out_unlock;
+       list_del(&tr->list);
+       event_trace_del_tracer(tr);
+       debugfs_remove_recursive(tr->dir);
+       free_percpu(tr->trace_buffer.data);
+       ring_buffer_free(tr->trace_buffer.buffer);
+       kfree(tr->name);
+       kfree(tr);
+       ret = 0;
+  out_unlock:
+       mutex_unlock(&trace_types_lock);
+       return ret;
+ }
+ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
+ {
+       struct dentry *parent;
+       int ret;
+       /* Paranoid: Make sure the parent is the "instances" directory */
+       parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+       if (WARN_ON_ONCE(parent != trace_instance_dir))
+               return -ENOENT;
+       /*
+        * The inode mutex is locked, but debugfs_create_dir() will also
+        * take the mutex. As the instances directory can not be destroyed
+        * or changed in any other way, it is safe to unlock it, and
+        * let the dentry try. If two users try to make the same dir at
+        * the same time, then the new_instance_create() will determine the
+        * winner.
+        */
+       mutex_unlock(&inode->i_mutex);
+       ret = new_instance_create(dentry->d_iname);
+       mutex_lock(&inode->i_mutex);
+       return ret;
+ }
+ static int instance_rmdir(struct inode *inode, struct dentry *dentry)
+ {
+       struct dentry *parent;
+       int ret;
+       /* Paranoid: Make sure the parent is the "instances" directory */
+       parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+       if (WARN_ON_ONCE(parent != trace_instance_dir))
+               return -ENOENT;
+       /* The caller did a dget() on dentry */
+       mutex_unlock(&dentry->d_inode->i_mutex);
+       /*
+        * The inode mutex is locked, but debugfs_create_dir() will also
+        * take the mutex. As the instances directory can not be destroyed
+        * or changed in any other way, it is safe to unlock it, and
+        * let the dentry try. If two users try to make the same dir at
+        * the same time, then the instance_delete() will determine the
+        * winner.
+        */
+       mutex_unlock(&inode->i_mutex);
+       ret = instance_delete(dentry->d_iname);
+       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock(&dentry->d_inode->i_mutex);
+       return ret;
+ }
+ static const struct inode_operations instance_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .mkdir          = instance_mkdir,
+       .rmdir          = instance_rmdir,
+ };
+ static __init void create_trace_instances(struct dentry *d_tracer)
+ {
+       trace_instance_dir = debugfs_create_dir("instances", d_tracer);
+       if (WARN_ON(!trace_instance_dir))
+               return;
+       /* Hijack the dir inode operations, to allow mkdir */
+       trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
+ }
+ static void
+ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
+ {
+       int cpu;
+       trace_create_file("trace_options", 0644, d_tracer,
+                         tr, &tracing_iter_fops);
+       trace_create_file("trace", 0644, d_tracer,
+                       (void *)&tr->trace_cpu, &tracing_fops);
+       trace_create_file("trace_pipe", 0444, d_tracer,
+                       (void *)&tr->trace_cpu, &tracing_pipe_fops);
+       trace_create_file("buffer_size_kb", 0644, d_tracer,
+                       (void *)&tr->trace_cpu, &tracing_entries_fops);
+       trace_create_file("buffer_total_size_kb", 0444, d_tracer,
+                         tr, &tracing_total_entries_fops);
+       trace_create_file("free_buffer", 0644, d_tracer,
+                         tr, &tracing_free_buffer_fops);
+       trace_create_file("trace_marker", 0220, d_tracer,
+                         tr, &tracing_mark_fops);
+       trace_create_file("trace_clock", 0644, d_tracer, tr,
+                         &trace_clock_fops);
+       trace_create_file("tracing_on", 0644, d_tracer,
+                           tr, &rb_simple_fops);
+ #ifdef CONFIG_TRACER_SNAPSHOT
+       trace_create_file("snapshot", 0644, d_tracer,
+                         (void *)&tr->trace_cpu, &snapshot_fops);
+ #endif
+       for_each_tracing_cpu(cpu)
+               tracing_init_debugfs_percpu(tr, cpu);
+ }
  static __init int tracer_init_debugfs(void)
  {
        struct dentry *d_tracer;
-       int cpu;
  
        trace_access_lock_init();
  
        d_tracer = tracing_init_dentry();
+       if (!d_tracer)
+               return 0;
  
-       trace_create_file("trace_options", 0644, d_tracer,
-                       NULL, &tracing_iter_fops);
+       init_tracer_debugfs(&global_trace, d_tracer);
  
        trace_create_file("tracing_cpumask", 0644, d_tracer,
-                       NULL, &tracing_cpumask_fops);
-       trace_create_file("trace", 0644, d_tracer,
-                       (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
+                       &global_trace, &tracing_cpumask_fops);
  
        trace_create_file("available_tracers", 0444, d_tracer,
                        &global_trace, &show_traces_fops);
        trace_create_file("README", 0444, d_tracer,
                        NULL, &tracing_readme_fops);
  
-       trace_create_file("trace_pipe", 0444, d_tracer,
-                       (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-       trace_create_file("buffer_size_kb", 0644, d_tracer,
-                       (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
-       trace_create_file("buffer_total_size_kb", 0444, d_tracer,
-                       &global_trace, &tracing_total_entries_fops);
-       trace_create_file("free_buffer", 0644, d_tracer,
-                       &global_trace, &tracing_free_buffer_fops);
-       trace_create_file("trace_marker", 0220, d_tracer,
-                       NULL, &tracing_mark_fops);
        trace_create_file("saved_cmdlines", 0444, d_tracer,
                        NULL, &tracing_saved_cmdlines_fops);
  
-       trace_create_file("trace_clock", 0644, d_tracer, NULL,
-                         &trace_clock_fops);
-       trace_create_file("tracing_on", 0644, d_tracer,
-                           &global_trace, &rb_simple_fops);
  #ifdef CONFIG_DYNAMIC_FTRACE
        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
  #endif
  
- #ifdef CONFIG_TRACER_SNAPSHOT
-       trace_create_file("snapshot", 0644, d_tracer,
-                         (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
- #endif
-       create_trace_options_dir();
+       create_trace_instances(d_tracer);
  
-       for_each_tracing_cpu(cpu)
-               tracing_init_debugfs_percpu(cpu);
+       create_trace_options_dir(&global_trace);
  
        return 0;
  }
@@@ -5148,8 -6044,8 +6047,8 @@@ voi
  trace_printk_seq(struct trace_seq *s)
  {
        /* Probably should print a warning here. */
-       if (s->len >= 1000)
-               s->len = 1000;
+       if (s->len >= TRACE_MAX_PRINT)
+               s->len = TRACE_MAX_PRINT;
  
        /* should be zero ended, but we are paranoid. */
        s->buffer[s->len] = 0;
  void trace_init_global_iter(struct trace_iterator *iter)
  {
        iter->tr = &global_trace;
-       iter->trace = current_trace;
-       iter->cpu_file = TRACE_PIPE_ALL_CPU;
+       iter->trace = iter->tr->current_trace;
+       iter->cpu_file = RING_BUFFER_ALL_CPUS;
+       iter->trace_buffer = &global_trace.trace_buffer;
  }
  
- static void
- __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
+ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
  {
-       static arch_spinlock_t ftrace_dump_lock =
-               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
+       static atomic_t dump_running;
        unsigned int old_userobj;
-       static int dump_ran;
        unsigned long flags;
        int cnt = 0, cpu;
  
-       /* only one dump */
-       local_irq_save(flags);
-       arch_spin_lock(&ftrace_dump_lock);
-       if (dump_ran)
-               goto out;
-       dump_ran = 1;
+       /* Only allow one dump user at a time. */
+       if (atomic_inc_return(&dump_running) != 1) {
+               atomic_dec(&dump_running);
+               return;
+       }
  
+       /*
+        * Always turn off tracing when we dump.
+        * We don't need to show trace output of what happens
+        * between multiple crashes.
+        *
+        * If the user does a sysrq-z, then they can re-enable
+        * tracing with echo 1 > tracing_on.
+        */
        tracing_off();
  
-       /* Did function tracer already get disabled? */
-       if (ftrace_is_dead()) {
-               printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
-               printk("#          MAY BE MISSING FUNCTION EVENTS\n");
-       }
-       if (disable_tracing)
-               ftrace_kill();
+       local_irq_save(flags);
  
        /* Simulate the iterator */
        trace_init_global_iter(&iter);
  
        for_each_tracing_cpu(cpu) {
-               atomic_inc(&iter.tr->data[cpu]->disabled);
+               atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
        }
  
        old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
  
        switch (oops_dump_mode) {
        case DUMP_ALL:
-               iter.cpu_file = TRACE_PIPE_ALL_CPU;
+               iter.cpu_file = RING_BUFFER_ALL_CPUS;
                break;
        case DUMP_ORIG:
                iter.cpu_file = raw_smp_processor_id();
                goto out_enable;
        default:
                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
-               iter.cpu_file = TRACE_PIPE_ALL_CPU;
+               iter.cpu_file = RING_BUFFER_ALL_CPUS;
        }
  
        printk(KERN_TRACE "Dumping ftrace buffer:\n");
  
+       /* Did function tracer already get disabled? */
+       if (ftrace_is_dead()) {
+               printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+               printk("#          MAY BE MISSING FUNCTION EVENTS\n");
+       }
        /*
         * We need to stop all tracing on all CPUS to read the
         * the next buffer. This is a bit expensive, but is
                printk(KERN_TRACE "---------------------------------\n");
  
   out_enable:
-       /* Re-enable tracing if requested */
-       if (!disable_tracing) {
-               trace_flags |= old_userobj;
+       trace_flags |= old_userobj;
  
-               for_each_tracing_cpu(cpu) {
-                       atomic_dec(&iter.tr->data[cpu]->disabled);
-               }
-               tracing_on();
+       for_each_tracing_cpu(cpu) {
+               atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
        }
-  out:
-       arch_spin_unlock(&ftrace_dump_lock);
+       atomic_dec(&dump_running);
        local_irq_restore(flags);
  }
- /* By default: disable tracing after the dump */
- void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
- {
-       __ftrace_dump(true, oops_dump_mode);
- }
  EXPORT_SYMBOL_GPL(ftrace_dump);
  
  __init static int tracer_alloc_buffers(void)
  {
        int ring_buf_size;
-       enum ring_buffer_flags rb_flags;
-       int i;
        int ret = -ENOMEM;
  
  
        else
                ring_buf_size = 1;
  
-       rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
        cpumask_copy(tracing_cpumask, cpu_all_mask);
  
+       raw_spin_lock_init(&global_trace.start_lock);
        /* TODO: make the number of buffers hot pluggable with CPUS */
-       global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
-       if (!global_trace.buffer) {
+       if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
                WARN_ON(1);
                goto out_free_cpumask;
        }
        if (global_trace.buffer_disabled)
                tracing_off();
  
- #ifdef CONFIG_TRACER_MAX_TRACE
-       max_tr.buffer = ring_buffer_alloc(1, rb_flags);
-       if (!max_tr.buffer) {
-               printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
-               WARN_ON(1);
-               ring_buffer_free(global_trace.buffer);
-               goto out_free_cpumask;
-       }
- #endif
-       /* Allocate the first page for all buffers */
-       for_each_tracing_cpu(i) {
-               global_trace.data[i] = &per_cpu(global_trace_cpu, i);
-               max_tr.data[i] = &per_cpu(max_tr_data, i);
-       }
-       set_buffer_entries(&global_trace,
-                          ring_buffer_size(global_trace.buffer, 0));
- #ifdef CONFIG_TRACER_MAX_TRACE
-       set_buffer_entries(&max_tr, 1);
- #endif
        trace_init_cmdlines();
-       init_irq_work(&trace_work_wakeup, trace_wake_up);
  
        register_tracer(&nop_trace);
  
+       global_trace.current_trace = &nop_trace;
        /* All seems OK, enable tracing */
        tracing_disabled = 0;
  
  
        register_die_notifier(&trace_die_notifier);
  
+       global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
+       /* Holder for file callbacks */
+       global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
+       global_trace.trace_cpu.tr = &global_trace;
+       INIT_LIST_HEAD(&global_trace.systems);
+       INIT_LIST_HEAD(&global_trace.events);
+       list_add(&global_trace.list, &ftrace_trace_arrays);
        while (trace_boot_options) {
                char *option;
  
                option = strsep(&trace_boot_options, ",");
-               trace_set_options(option);
+               trace_set_options(&global_trace, option);
        }
  
+       register_snapshot_cmd();
        return 0;
  
  out_free_cpumask:
+       free_percpu(global_trace.trace_buffer.data);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+       free_percpu(global_trace.max_buffer.data);
+ #endif
        free_cpumask_var(tracing_cpumask);
  out_free_buffer_mask:
        free_cpumask_var(tracing_buffer_mask);
index 697e88d139076acc16eff22796efcc2e91b412ed,f475b2a7ac888794489a4b3ef58f2b4466f8dc3e..bb922d9ee51ba7b51171db284f6cd20576b012b2
@@@ -14,7 -14,7 +14,7 @@@
  /* must be a power of 2 */
  #define EVENT_HASHSIZE        128
  
- DECLARE_RWSEM(trace_event_mutex);
+ DECLARE_RWSEM(trace_event_sem);
  
  static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
  
@@@ -37,6 -37,22 +37,22 @@@ int trace_print_seq(struct seq_file *m
        return ret;
  }
  
+ enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
+ {
+       struct trace_seq *s = &iter->seq;
+       struct trace_entry *entry = iter->ent;
+       struct bputs_entry *field;
+       int ret;
+       trace_assign_type(field, entry);
+       ret = trace_seq_puts(s, field->str);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+       return TRACE_TYPE_HANDLED;
+ }
  enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
  {
        struct trace_seq *s = &iter->seq;
@@@ -397,6 -413,32 +413,32 @@@ ftrace_print_hex_seq(struct trace_seq *
  }
  EXPORT_SYMBOL(ftrace_print_hex_seq);
  
+ int ftrace_raw_output_prep(struct trace_iterator *iter,
+                          struct trace_event *trace_event)
+ {
+       struct ftrace_event_call *event;
+       struct trace_seq *s = &iter->seq;
+       struct trace_seq *p = &iter->tmp_seq;
+       struct trace_entry *entry;
+       int ret;
+       event = container_of(trace_event, struct ftrace_event_call, event);
+       entry = iter->ent;
+       if (entry->type != event->event.type) {
+               WARN_ON_ONCE(1);
+               return TRACE_TYPE_UNHANDLED;
+       }
+       trace_seq_init(p);
+       ret = trace_seq_printf(s, "%s: ", event->name);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+       return 0;
+ }
+ EXPORT_SYMBOL(ftrace_raw_output_prep);
  #ifdef CONFIG_KRETPROBES
  static inline const char *kretprobed(const char *name)
  {
@@@ -617,7 -659,7 +659,7 @@@ lat_print_timestamp(struct trace_iterat
  {
        unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
        unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
-       unsigned long long abs_ts = iter->ts - iter->tr->time_start;
+       unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start;
        unsigned long long rel_ts = next_ts - iter->ts;
        struct trace_seq *s = &iter->seq;
  
@@@ -739,11 -781,12 +781,11 @@@ static int task_state_char(unsigned lon
  struct trace_event *ftrace_find_event(int type)
  {
        struct trace_event *event;
 -      struct hlist_node *n;
        unsigned key;
  
        key = type & (EVENT_HASHSIZE - 1);
  
 -      hlist_for_each_entry(event, n, &event_hash[key], node) {
 +      hlist_for_each_entry(event, &event_hash[key], node) {
                if (event->type == type)
                        return event;
        }
@@@ -783,12 -826,12 +825,12 @@@ static int trace_search_list(struct lis
  
  void trace_event_read_lock(void)
  {
-       down_read(&trace_event_mutex);
+       down_read(&trace_event_sem);
  }
  
  void trace_event_read_unlock(void)
  {
-       up_read(&trace_event_mutex);
+       up_read(&trace_event_sem);
  }
  
  /**
@@@ -811,7 -854,7 +853,7 @@@ int register_ftrace_event(struct trace_
        unsigned key;
        int ret = 0;
  
-       down_write(&trace_event_mutex);
+       down_write(&trace_event_sem);
  
        if (WARN_ON(!event))
                goto out;
  
        ret = event->type;
   out:
-       up_write(&trace_event_mutex);
+       up_write(&trace_event_sem);
  
        return ret;
  }
  EXPORT_SYMBOL_GPL(register_ftrace_event);
  
  /*
-  * Used by module code with the trace_event_mutex held for write.
+  * Used by module code with the trace_event_sem held for write.
   */
  int __unregister_ftrace_event(struct trace_event *event)
  {
   */
  int unregister_ftrace_event(struct trace_event *event)
  {
-       down_write(&trace_event_mutex);
+       down_write(&trace_event_sem);
        __unregister_ftrace_event(event);
-       up_write(&trace_event_mutex);
+       up_write(&trace_event_sem);
  
        return 0;
  }
@@@ -1217,6 -1260,64 +1259,64 @@@ static struct trace_event trace_user_st
        .funcs          = &trace_user_stack_funcs,
  };
  
+ /* TRACE_BPUTS */
+ static enum print_line_t
+ trace_bputs_print(struct trace_iterator *iter, int flags,
+                  struct trace_event *event)
+ {
+       struct trace_entry *entry = iter->ent;
+       struct trace_seq *s = &iter->seq;
+       struct bputs_entry *field;
+       trace_assign_type(field, entry);
+       if (!seq_print_ip_sym(s, field->ip, flags))
+               goto partial;
+       if (!trace_seq_puts(s, ": "))
+               goto partial;
+       if (!trace_seq_puts(s, field->str))
+               goto partial;
+       return TRACE_TYPE_HANDLED;
+  partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+ }
+ static enum print_line_t
+ trace_bputs_raw(struct trace_iterator *iter, int flags,
+               struct trace_event *event)
+ {
+       struct bputs_entry *field;
+       struct trace_seq *s = &iter->seq;
+       trace_assign_type(field, iter->ent);
+       if (!trace_seq_printf(s, ": %lx : ", field->ip))
+               goto partial;
+       if (!trace_seq_puts(s, field->str))
+               goto partial;
+       return TRACE_TYPE_HANDLED;
+  partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+ }
+ static struct trace_event_functions trace_bputs_funcs = {
+       .trace          = trace_bputs_print,
+       .raw            = trace_bputs_raw,
+ };
+ static struct trace_event trace_bputs_event = {
+       .type           = TRACE_BPUTS,
+       .funcs          = &trace_bputs_funcs,
+ };
  /* TRACE_BPRINT */
  static enum print_line_t
  trace_bprint_print(struct trace_iterator *iter, int flags,
@@@ -1329,6 -1430,7 +1429,7 @@@ static struct trace_event *events[] __i
        &trace_wake_event,
        &trace_stack_event,
        &trace_user_stack_event,
+       &trace_bputs_event,
        &trace_bprint_event,
        &trace_print_event,
        NULL
index 83a8b5b7bd35fa01a5e0302d83bf3d33d81f9557,8c3f37e2dc4356e82ce13569b351b5f28693bfe5..b20428c5efe26ba54ef2a6e83bb9ad711a9f85b5
  
  #define STACK_TRACE_ENTRIES 500
  
+ #ifdef CC_USING_FENTRY
+ # define fentry               1
+ #else
+ # define fentry               0
+ #endif
  static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
         { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
  static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
  
+ /*
+  * Reserve one entry for the passed in ip. This will allow
+  * us to remove most or all of the stack size overhead
+  * added by the stack tracer itself.
+  */
  static struct stack_trace max_stack_trace = {
-       .max_entries            = STACK_TRACE_ENTRIES,
-       .entries                = stack_dump_trace,
+       .max_entries            = STACK_TRACE_ENTRIES - 1,
+       .entries                = &stack_dump_trace[1],
  };
  
  static unsigned long max_stack_size;
@@@ -39,25 -50,34 +50,34 @@@ static DEFINE_MUTEX(stack_sysctl_mutex)
  int stack_tracer_enabled;
  static int last_stack_tracer_enabled;
  
- static inline void check_stack(void)
+ static inline void
+ check_stack(unsigned long ip, unsigned long *stack)
  {
        unsigned long this_size, flags;
        unsigned long *p, *top, *start;
+       static int tracer_frame;
+       int frame_size = ACCESS_ONCE(tracer_frame);
        int i;
  
-       this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+       this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
        this_size = THREAD_SIZE - this_size;
+       /* Remove the frame of the tracer */
+       this_size -= frame_size;
  
        if (this_size <= max_stack_size)
                return;
  
        /* we do not handle interrupt stacks yet */
-       if (!object_is_on_stack(&this_size))
+       if (!object_is_on_stack(stack))
                return;
  
        local_irq_save(flags);
        arch_spin_lock(&max_stack_lock);
  
+       /* In case another CPU set the tracer_frame on us */
+       if (unlikely(!frame_size))
+               this_size -= tracer_frame;
        /* a race could have already updated it */
        if (this_size <= max_stack_size)
                goto out;
  
        save_stack_trace(&max_stack_trace);
  
+       /*
+        * Add the passed in ip from the function tracer.
+        * Searching for this on the stack will skip over
+        * most of the overhead from the stack tracer itself.
+        */
+       stack_dump_trace[0] = ip;
+       max_stack_trace.nr_entries++;
        /*
         * Now find where in the stack these are.
         */
        i = 0;
-       start = &this_size;
+       start = stack;
        top = (unsigned long *)
                (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
  
                                found = 1;
                                /* Start the search from here */
                                start = p + 1;
+                               /*
+                                * We do not want to show the overhead
+                                * of the stack tracer stack in the
+                                * max stack. If we haven't figured
+                                * out what that is, then figure it out
+                                * now.
+                                */
+                               if (unlikely(!tracer_frame) && i == 1) {
+                                       tracer_frame = (p - stack) *
+                                               sizeof(unsigned long);
+                                       max_stack_size -= tracer_frame;
+                               }
                        }
                }
  
@@@ -113,6 -153,7 +153,7 @@@ static voi
  stack_trace_call(unsigned long ip, unsigned long parent_ip,
                 struct ftrace_ops *op, struct pt_regs *pt_regs)
  {
+       unsigned long stack;
        int cpu;
  
        preempt_disable_notrace();
        if (per_cpu(trace_active, cpu)++ != 0)
                goto out;
  
-       check_stack();
+       /*
+        * When fentry is used, the traced function does not get
+        * its stack frame set up, and we lose the parent.
+        * The ip is pretty useless because the function tracer
+        * was called before that function set up its stack frame.
+        * In this case, we use the parent ip.
+        *
+        * By adding the return address of either the parent ip
+        * or the current ip we can disregard most of the stack usage
+        * caused by the stack tracer itself.
+        *
+        * The function tracer always reports the address of where the
+        * mcount call was, but the stack will hold the return address.
+        */
+       if (fentry)
+               ip = parent_ip;
+       else
+               ip += MCOUNT_INSN_SIZE;
+       check_stack(ip, &stack);
  
   out:
        per_cpu(trace_active, cpu)--;
@@@ -322,7 -382,7 +382,7 @@@ static const struct file_operations sta
        .open = stack_trace_filter_open,
        .read = seq_read,
        .write = ftrace_filter_write,
 -      .llseek = ftrace_regex_lseek,
 +      .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
  };
  
@@@ -371,6 -431,8 +431,8 @@@ static __init int stack_trace_init(void
        struct dentry *d_tracer;
  
        d_tracer = tracing_init_dentry();
+       if (!d_tracer)
+               return 0;
  
        trace_create_file("stack_max_size", 0644, d_tracer,
                        &max_stack_size, &stack_max_size_fops);
diff --combined kernel/tracepoint.c
index 0c05a4592047c7ccfdc323c1be24af40ac90cbe7,99e7e314e451ca1adee476781225faf5657be9f9..29f26540e9c9550fd1c099512e7b20beb21ce234
@@@ -112,7 -112,8 +112,8 @@@ tracepoint_entry_add_probe(struct trace
        int nr_probes = 0;
        struct tracepoint_func *old, *new;
  
-       WARN_ON(!probe);
+       if (WARN_ON(!probe))
+               return ERR_PTR(-EINVAL);
  
        debug_print_probes(entry);
        old = entry->funcs;
@@@ -152,13 -153,18 +153,18 @@@ tracepoint_entry_remove_probe(struct tr
  
        debug_print_probes(entry);
        /* (N -> M), (N > 1, M >= 0) probes */
-       for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
-               if (!probe ||
-                   (old[nr_probes].func == probe &&
-                    old[nr_probes].data == data))
-                       nr_del++;
+       if (probe) {
+               for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+                       if (old[nr_probes].func == probe &&
+                            old[nr_probes].data == data)
+                               nr_del++;
+               }
        }
  
+       /*
+        * If probe is NULL, then nr_probes = nr_del = 0, and then the
+        * entire entry will be removed.
+        */
        if (nr_probes - nr_del == 0) {
                /* N -> 0, (N > 1) */
                entry->funcs = NULL;
                if (new == NULL)
                        return ERR_PTR(-ENOMEM);
                for (i = 0; old[i].func; i++)
-                       if (probe &&
-                           (old[i].func != probe || old[i].data != data))
+                       if (old[i].func != probe || old[i].data != data)
                                new[j++] = old[i];
                new[nr_probes - nr_del].func = NULL;
                entry->refcount = nr_probes - nr_del;
  static struct tracepoint_entry *get_tracepoint(const char *name)
  {
        struct hlist_head *head;
 -      struct hlist_node *node;
        struct tracepoint_entry *e;
        u32 hash = jhash(name, strlen(name), 0);
  
        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 -      hlist_for_each_entry(e, node, head, hlist) {
 +      hlist_for_each_entry(e, head, hlist) {
                if (!strcmp(name, e->name))
                        return e;
        }
  static struct tracepoint_entry *add_tracepoint(const char *name)
  {
        struct hlist_head *head;
 -      struct hlist_node *node;
        struct tracepoint_entry *e;
        size_t name_len = strlen(name) + 1;
        u32 hash = jhash(name, name_len-1, 0);
  
        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 -      hlist_for_each_entry(e, node, head, hlist) {
 +      hlist_for_each_entry(e, head, hlist) {
                if (!strcmp(name, e->name)) {
                        printk(KERN_NOTICE
                                "tracepoint %s busy\n", name);
This page took 0.287538 seconds and 4 git commands to generate.