]> Git Repo - linux.git/commitdiff
Merge tag 'trace-ring-buffer-v6.12' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <[email protected]>
Sun, 22 Sep 2024 16:47:16 +0000 (09:47 -0700)
committerLinus Torvalds <[email protected]>
Sun, 22 Sep 2024 16:47:16 +0000 (09:47 -0700)
Pull ring-buffer updates from Steven Rostedt:

 - tracing/ring-buffer: persistent buffer across reboots

   This allows for the tracing instance ring buffer to stay persistent
   across reboots. The way this is done is by adding to the kernel
   command line:

     trace_instance=boot_map@0x285400000:12M

   This will reserve 12 megabytes at the address 0x285400000, and then
   map the tracing instance "boot_map" ring buffer to that memory. This
   will appear as a normal instance in the tracefs system:

     /sys/kernel/tracing/instances/boot_map

   A user could enable tracing in that instance, and on reboot or kernel
   crash, if the memory is not wiped by the firmware, it will recreate
   the trace in that instance. For example, if one was debugging a
   shutdown of a kernel reboot:

     # cd /sys/kernel/tracing
     # echo function > instances/boot_map/current_tracer
     # reboot
     [..]
     # cd /sys/kernel/tracing
     # tail instances/boot_map/trace
           swapper/0-1       [000] d..1.   164.549800: restore_boot_irq_mode <-native_machine_shutdown
           swapper/0-1       [000] d..1.   164.549801: native_restore_boot_irq_mode <-native_machine_shutdown
           swapper/0-1       [000] d..1.   164.549802: disconnect_bsp_APIC <-native_machine_shutdown
           swapper/0-1       [000] d..1.   164.549811: hpet_disable <-native_machine_shutdown
           swapper/0-1       [000] d..1.   164.549812: iommu_shutdown_noop <-native_machine_restart
           swapper/0-1       [000] d..1.   164.549813: native_machine_emergency_restart <-__do_sys_reboot
           swapper/0-1       [000] d..1.   164.549813: tboot_shutdown <-native_machine_emergency_restart
           swapper/0-1       [000] d..1.   164.549820: acpi_reboot <-native_machine_emergency_restart
           swapper/0-1       [000] d..1.   164.549821: acpi_reset <-acpi_reboot
           swapper/0-1       [000] d..1.   164.549822: acpi_os_write_port <-acpi_reboot

   On reboot, the buffer is examined to make sure it is valid. The
   validation check even steps through every event to make sure the meta
   data of the event is correct. If any test fails, it will simply reset
   the buffer, and the buffer will be empty on boot.

 - Allow the tracing persistent boot buffer to use the "reserve_mem"
   option

   Instead of having the admin find a physical address to store the
   persistent buffer, which can be very tedious if they have to
   administrate several different machines, allow them to use the
   "reserve_mem" option that will find a location for them. It is not as
   reliable because of KASLR, as the loading of the kernel in different
   locations can cause the memory allocated to be inconsistent. Booting
   with "nokaslr" can make reserve_mem more reliable.

 - Have function graph tracer handle offsets from a previous boot.

   The ring buffer output from a previous boot may have different
   addresses due to kaslr. Have the function graph tracer handle these
   by using the delta from the previous boot to the new boot address
   space.

 - Only reset the saved meta offset when the buffer is started or reset

   In the persistent memory meta data, it holds the previous address
   space information, so that it can calculate the delta to have
   function tracing work. But this gets updated after being read to hold
   the new address space. But if the buffer isn't used for that boot, on
   reboot, the delta is now calculated from the previous boot and not
   the boot that holds the data in the ring buffer. This causes the
   functions not to be shown. Do not save the address space information
   of the current kernel until it is being recorded.

 - Add a magic variable to test the valid meta data

   Add a magic variable in the meta data that can also be used for
   validation. The validator of the previous buffer doesn't need this
   magic data, but it can be used if the meta data is changed by a new
   kernel, which may have the same format that passes the validator but
   is used differently. This magic number can also be used as a
   "versioning" of the meta data.

 - Align user space mapped ring buffer sub buffers to improve TLB
   entries

   Linus mentioned that the mapped ring buffer sub buffers were
   misaligned between the meta page and the sub-buffers, so that if the
   sub-buffers were bigger than PAGE_SIZE, it wouldn't allow the TLB to
   use bigger entries.

 - Add new kernel command line "traceoff" to disable tracing on boot for
   instances

   If tracing is enabled for a boot instance, there needs a way to be
   able to disable it on boot so that new events do not get entered into
   the ring buffer and be mixed with events from a previous boot, as
   that can be confusing.

 - Allow trace_printk() to go to other instances

   Currently, trace_printk() can only go to the top level instance. When
   debugging with a persistent buffer, it is really useful to be able to
   add trace_printk() to go to that buffer, so that you have access to
   them after a crash.

 - Do not use "bin_printk()" for traces to a boot instance

   The bin_printk() saves only a pointer to the printk format in the
   ring buffer, as the reader of the buffer can still have access to it.
   But this is not the case if the buffer is from a previous boot. If
   the trace_printk() is going to a "persistent" buffer, it will use the
   slower version that writes the printk format into the buffer.

 - Add command line option to allow trace_printk() to go to an instance

   Allow the kernel command line to define which instance the
   trace_printk() goes to, instead of forcing the admin to set it for
   every boot via the tracefs options.

 - Start a document that explains how to use tracefs to debug the kernel

 - Add some more kernel selftests to test user mapped ring buffer

* tag 'trace-ring-buffer-v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (28 commits)
  selftests/ring-buffer: Handle meta-page bigger than the system
  selftests/ring-buffer: Verify the entire meta-page padding
  tracing/Documentation: Start a document on how to debug with tracing
  tracing: Add option to set an instance to be the trace_printk destination
  tracing: Have trace_printk not use binary prints if boot buffer
  tracing: Allow trace_printk() to go to other instance buffers
  tracing: Add "traceoff" flag to boot time tracing instances
  ring-buffer: Align meta-page to sub-buffers for improved TLB usage
  ring-buffer: Add magic and struct size to boot up meta data
  ring-buffer: Don't reset persistent ring-buffer meta saved addresses
  tracing/fgraph: Have fgraph handle previous boot function addresses
  tracing: Allow boot instances to use reserve_mem boot memory
  tracing: Fix ifdef of snapshots to not prevent last_boot_info file
  ring-buffer: Use vma_pages() helper function
  tracing: Fix NULL vs IS_ERR() check in enable_instances()
  tracing: Add last boot delta offset for stack traces
  tracing: Update function tracing output for previous boot buffer
  tracing: Handle old buffer mappings for event strings and functions
  tracing/ring-buffer: Add last_boot_info file to boot instance
  ring-buffer: Save text and data locations in mapped meta data
  ...

1  2 
Documentation/admin-guide/kernel-parameters.txt
kernel/trace/trace.c

index 19b71ff1168e7d20ccafd93926674fd5c0729bf7,9bb50dc78338275aa7456d13a0b53ff818541b85..bb48ae24ae69fa4d73ec91fadeddc3da0215e123
                                          allowed anymore to lift isolation
                                          requirements as needed. This option
                                          does not override iommu=pt
 -                      force_enable - Force enable the IOMMU on platforms known
 -                                     to be buggy with IOMMU enabled. Use this
 -                                     option with care.
 -                      pgtbl_v1     - Use v1 page table for DMA-API (Default).
 -                      pgtbl_v2     - Use v2 page table for DMA-API.
 -                      irtcachedis  - Disable Interrupt Remapping Table (IRT) caching.
 +                      force_enable    - Force enable the IOMMU on platforms known
 +                                        to be buggy with IOMMU enabled. Use this
 +                                        option with care.
 +                      pgtbl_v1        - Use v1 page table for DMA-API (Default).
 +                      pgtbl_v2        - Use v2 page table for DMA-API.
 +                      irtcachedis     - Disable Interrupt Remapping Table (IRT) caching.
 +                      nohugepages     - Limit page-sizes used for v1 page-tables
 +                                        to 4 KiB.
 +                      v2_pgsizes_only - Limit page-sizes used for v1 page-tables
 +                                        to 4KiB/2Mib/1GiB.
 +
  
        amd_iommu_dump= [HW,X86-64]
                        Enable AMD IOMMU driver option to dump the ACPI table
                        Format: <io>,<irq>,<mode>
                        See header of drivers/net/hamradio/baycom_ser_hdx.c.
  
 +      bdev_allow_write_mounted=
 +                      Format: <bool>
 +                      Control the ability to open a mounted block device
 +                      for writing, i.e., allow / disallow writes that bypass
 +                      the FS. This was implemented as a means to prevent
 +                      fuzzers from crashing the kernel by overwriting the
 +                      metadata underneath a mounted FS without its awareness.
 +                      This also prevents destructive formatting of mounted
 +                      filesystems by naive storage tooling that don't use
 +                      O_EXCL. Default is Y and can be changed through the
 +                      Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
 +
        bert_disable    [ACPI]
                        Disable BERT OS support on buggy BIOSes.
  
        ipcmni_extend   [KNL,EARLY] Extend the maximum number of unique System V
                        IPC identifiers from 32,768 to 16,777,216.
  
 +      ipe.enforce=    [IPE]
 +                      Format: <bool>
 +                      Determine whether IPE starts in permissive (0) or
 +                      enforce (1) mode. The default is enforce.
 +
 +      ipe.success_audit=
 +                      [IPE]
 +                      Format: <bool>
 +                      Start IPE with success auditing enabled, emitting
 +                      an audit event when a binary is allowed. The default
 +                      is 0.
 +
        irqaffinity=    [SMP] Set the default irq affinity mask
                        The argument is a cpu list, as described above.
  
                        Disable NUMA, Only set up a single NUMA node
                        spanning all memory.
  
 +      numa=fake=<size>[MG]
 +                      [KNL, ARM64, RISCV, X86, EARLY]
 +                      If given as a memory unit, fills all system RAM with
 +                      nodes of size interleaved over physical nodes.
 +
 +      numa=fake=<N>
 +                      [KNL, ARM64, RISCV, X86, EARLY]
 +                      If given as an integer, fills all system RAM with N
 +                      fake nodes interleaved over physical nodes.
 +
 +      numa=fake=<N>U
 +                      [KNL, ARM64, RISCV, X86, EARLY]
 +                      If given as an integer followed by 'U', it will
 +                      divide each physical node into N emulated nodes.
 +
        numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
                        NUMA balancing.
                        Allowed values are enable and disable
        printk.time=    Show timing data prefixed to each printk message line
                        Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
  
 +      proc_mem.force_override= [KNL]
 +                      Format: {always | ptrace | never}
 +                      Traditionally /proc/pid/mem allows memory permissions to be
 +                      overridden without restrictions. This option may be set to
 +                      restrict that. Can be one of:
 +                      - 'always': traditional behavior always allows mem overrides.
 +                      - 'ptrace': only allow mem overrides for active ptracers.
 +                      - 'never':  never allow mem overrides.
 +                      If not specified, default is the CONFIG_PROC_MEM_* choice.
 +
        processor.max_cstate=   [HW,ACPI]
                        Limit processor to maximum C-state
                        max_cstate=9 overrides any DMI blacklist limit.
                        Set maximum number of finished RCU callbacks to
                        process in one batch.
  
 +      rcutree.csd_lock_suppress_rcu_stall=    [KNL]
 +                      Do only a one-line RCU CPU stall warning when
 +                      there is an ongoing too-long CSD-lock wait.
 +
        rcutree.do_rcu_barrier= [KNL]
                        Request a call to rcu_barrier().  This is
                        throttled so that userspace tests can safely
                        Time to wait (s) after boot before inducing stall.
  
        rcutorture.stall_cpu_irqsoff= [KNL]
 -                      Disable interrupts while stalling if set.
 +                      Disable interrupts while stalling if set, but only
 +                      on the first stall in the set.
 +
 +      rcutorture.stall_cpu_repeat= [KNL]
 +                      Number of times to repeat the stall sequence,
 +                      so that rcutorture.stall_cpu_repeat=3 will result
 +                      in four stall sequences.
  
        rcutorture.stall_gp_kthread= [KNL]
                        Duration (s) of forced sleep within RCU
                        of zero will disable batching.  Batching is
                        always disabled for synchronize_rcu_tasks().
  
 -      rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
 -                      Set timeout in milliseconds RCU Tasks
 -                      Rude asynchronous callback batching for
 -                      call_rcu_tasks_rude().  A negative value
 -                      will take the default.  A value of zero will
 -                      disable batching.  Batching is always disabled
 -                      for synchronize_rcu_tasks_rude().
 -
        rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
                        Set timeout in milliseconds RCU Tasks
                        Trace asynchronous callback batching for
                        <deci-seconds>: poll all this frequency
                        0: no polling (default)
  
 +      thp_anon=       [KNL]
 +                      Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>
 +                      state is one of "always", "madvise", "never" or "inherit".
 +                      Control the default behavior of the system with respect
 +                      to anonymous transparent hugepages.
 +                      Can be used multiple times for multiple anon THP sizes.
 +                      See Documentation/admin-guide/mm/transhuge.rst for more
 +                      details.
 +
        threadirqs      [KNL,EARLY]
                        Force threading of all interrupt handlers except those
                        marked explicitly IRQF_NO_THREAD.
                        the same thing would happen if it was left off). The irq_handler_entry
                        event, and all events under the "initcall" system.
  
+                       Flags can be added to the instance to modify its behavior when it is
+                       created. The flags are separated by '^'.
+                       The available flags are:
+                           traceoff    - Have the tracing instance tracing disabled after it is created.
+                           traceprintk - Have trace_printk() write into this trace instance
+                                         (note, "printk" and "trace_printk" can also be used)
+                               trace_instance=foo^traceoff^traceprintk,sched,irq
+                       The flags must come before the defined events.
+                       If memory has been reserved (see memmap for x86), the instance
+                       can use that memory:
+                               memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
+                       The above will create a "boot_map" instance that uses the physical
+                       memory at 0x284500000 that is 12Megs. The per CPU buffers of that
+                       instance will be split up accordingly.
+                       Alternatively, the memory can be reserved by the reserve_mem option:
+                               reserve_mem=12M:4096:trace trace_instance=boot_map@trace
+                       This will reserve 12 megabytes at boot up with a 4096 byte alignment
+                       and place the ring buffer in this memory. Note that due to KASLR, the
+                       memory may not be the same location each time, which will not preserve
+                       the buffer content.
+                       Also note that the layout of the ring buffer data may change between
+                       kernel versions where the validator will fail and reset the ring buffer
+                       if the layout is not the same as the previous kernel.
+                       If the ring buffer is used for persistent bootups and has events enabled,
+                       it is recommend to disable tracing so that events from a previous boot do not
+                       mix with events of the current boot (unless you are debugging a random crash
+                       at boot up).
+                               reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
+                       See also Documentation/trace/debugging.rst
        trace_options=[option-list]
                        [FTRACE] Enable or disable tracer options at boot.
                        The option-list is a comma delimited list of options
                        it can be updated at runtime by writing to the
                        corresponding sysfs file.
  
 +      workqueue.panic_on_stall=<uint>
 +                      Panic when workqueue stall is detected by
 +                      CONFIG_WQ_WATCHDOG. It sets the number times of the
 +                      stall to trigger panic.
 +
 +                      The default is 0, which disables the panic on stall.
 +
        workqueue.cpu_intensive_thresh_us=
                        Per-cpu work items which run for longer than this
                        threshold are automatically considered CPU intensive
diff --combined kernel/trace/trace.c
index c3b2c7dfadef1a253258e62493b0fa444c0c99a6,658b40b483a340856ce9e0cbff679d58c8f023aa..b4f348b4653fa1cfe736e0eda0308cf9b6ad0914
@@@ -482,7 -482,7 +482,7 @@@ EXPORT_SYMBOL_GPL(unregister_ftrace_exp
         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
-        TRACE_ITER_HASH_PTR)
+        TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
  
  /* trace_options that are only supported by global_trace */
  #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                    \
  
  /* trace_flags that are default zero for instances */
  #define ZEROED_TRACE_FLAGS \
-       (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
+       (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
  
  /*
   * The global_trace is the descriptor that holds the top-level tracing
@@@ -500,6 -500,29 +500,29 @@@ static struct trace_array global_trace 
        .trace_flags = TRACE_DEFAULT_FLAGS,
  };
  
+ static struct trace_array *printk_trace = &global_trace;
+ static __always_inline bool printk_binsafe(struct trace_array *tr)
+ {
+       /*
+        * The binary format of traceprintk can cause a crash if used
+        * by a buffer from another boot. Force the use of the
+        * non binary version of trace_printk if the trace_printk
+        * buffer is a boot mapped ring buffer.
+        */
+       return !(tr->flags & TRACE_ARRAY_FL_BOOT);
+ }
+ static void update_printk_trace(struct trace_array *tr)
+ {
+       if (printk_trace == tr)
+               return;
+       printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
+       printk_trace = tr;
+       tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
+ }
  void trace_set_ring_buffer_expanded(struct trace_array *tr)
  {
        if (!tr)
@@@ -1117,7 -1140,7 +1140,7 @@@ EXPORT_SYMBOL_GPL(__trace_array_puts)
   */
  int __trace_puts(unsigned long ip, const char *str, int size)
  {
-       return __trace_array_puts(&global_trace, ip, str, size);
+       return __trace_array_puts(printk_trace, ip, str, size);
  }
  EXPORT_SYMBOL_GPL(__trace_puts);
  
   */
  int __trace_bputs(unsigned long ip, const char *str)
  {
+       struct trace_array *tr = READ_ONCE(printk_trace);
        struct ring_buffer_event *event;
        struct trace_buffer *buffer;
        struct bputs_entry *entry;
        int size = sizeof(struct bputs_entry);
        int ret = 0;
  
-       if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+       if (!printk_binsafe(tr))
+               return __trace_puts(ip, str, strlen(str));
+       if (!(tr->trace_flags & TRACE_ITER_PRINTK))
                return 0;
  
        if (unlikely(tracing_selftest_running || tracing_disabled))
                return 0;
  
        trace_ctx = tracing_gen_ctx();
-       buffer = global_trace.array_buffer.buffer;
+       buffer = tr->array_buffer.buffer;
  
        ring_buffer_nest_start(buffer);
        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
        entry->str                      = str;
  
        __buffer_unlock_commit(buffer, event);
-       ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+       ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
  
        ret = 1;
   out:
@@@ -2226,6 -2253,10 +2253,6 @@@ static __init int init_trace_selftests(
  }
  core_initcall(init_trace_selftests);
  #else
 -static inline int run_tracer_selftest(struct tracer *type)
 -{
 -      return 0;
 -}
  static inline int do_run_tracer_selftest(struct tracer *type)
  {
        return 0;
@@@ -3021,7 -3052,7 +3048,7 @@@ void trace_dump_stack(int skip
        /* Skip 1 to skip this function. */
        skip++;
  #endif
-       __ftrace_trace_stack(global_trace.array_buffer.buffer,
+       __ftrace_trace_stack(printk_trace->array_buffer.buffer,
                             tracing_gen_ctx(), skip, NULL);
  }
  EXPORT_SYMBOL_GPL(trace_dump_stack);
@@@ -3240,12 -3271,15 +3267,15 @@@ int trace_vbprintk(unsigned long ip, co
        struct trace_event_call *call = &event_bprint;
        struct ring_buffer_event *event;
        struct trace_buffer *buffer;
-       struct trace_array *tr = &global_trace;
+       struct trace_array *tr = READ_ONCE(printk_trace);
        struct bprint_entry *entry;
        unsigned int trace_ctx;
        char *tbuffer;
        int len = 0, size;
  
+       if (!printk_binsafe(tr))
+               return trace_vprintk(ip, fmt, args);
        if (unlikely(tracing_selftest_running || tracing_disabled))
                return 0;
  
@@@ -3338,7 -3372,7 +3368,7 @@@ __trace_array_vprintk(struct trace_buff
        memcpy(&entry->buf, tbuffer, len + 1);
        if (!call_filter_check_discard(call, entry, buffer, event)) {
                __buffer_unlock_commit(buffer, event);
-               ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
+               ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
        }
  
  out:
@@@ -3434,7 -3468,7 +3464,7 @@@ int trace_array_printk_buf(struct trace
        int ret;
        va_list ap;
  
-       if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+       if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
                return 0;
  
        va_start(ap, fmt);
  __printf(2, 0)
  int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
  {
-       return trace_array_vprintk(&global_trace, ip, fmt, args);
+       return trace_array_vprintk(printk_trace, ip, fmt, args);
  }
  EXPORT_SYMBOL_GPL(trace_vprintk);
  
@@@ -3667,8 -3701,11 +3697,11 @@@ static void test_can_verify(void
  void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                         va_list ap)
  {
+       long text_delta = iter->tr->text_delta;
+       long data_delta = iter->tr->data_delta;
        const char *p = fmt;
        const char *str;
+       bool good;
        int i, j;
  
        if (WARN_ON_ONCE(!fmt))
  
                j = 0;
  
-               /* We only care about %s and variants */
+               /*
+                * We only care about %s and variants
+                * as well as %p[sS] if delta is non-zero
+                */
                for (i = 0; p[i]; i++) {
                        if (i + 1 >= iter->fmt_size) {
                                /*
                                }
                                if (p[i+j] == 's')
                                        break;
+                               if (text_delta && p[i+1] == 'p' &&
+                                   ((p[i+2] == 's' || p[i+2] == 'S')))
+                                       break;
                                star = false;
                        }
                        j = 0;
                iter->fmt[i] = '\0';
                trace_seq_vprintf(&iter->seq, iter->fmt, ap);
  
+               /* Add delta to %pS pointers */
+               if (p[i+1] == 'p') {
+                       unsigned long addr;
+                       char fmt[4];
+                       fmt[0] = '%';
+                       fmt[1] = 'p';
+                       fmt[2] = p[i+2]; /* Either %ps or %pS */
+                       fmt[3] = '\0';
+                       addr = va_arg(ap, unsigned long);
+                       addr += text_delta;
+                       trace_seq_printf(&iter->seq, fmt, (void *)addr);
+                       p += i + 3;
+                       continue;
+               }
                /*
                 * If iter->seq is full, the above call no longer guarantees
                 * that ap is in sync with fmt processing, and further calls
                /* The ap now points to the string data of the %s */
                str = va_arg(ap, const char *);
  
+               good = trace_safe_str(iter, str, star, len);
+               /* Could be from the last boot */
+               if (data_delta && !good) {
+                       str += data_delta;
+                       good = trace_safe_str(iter, str, star, len);
+               }
                /*
                 * If you hit this warning, it is likely that the
                 * trace event in question used %s on a string that
                 * instead. See samples/trace_events/trace-events-sample.h
                 * for reference.
                 */
-               if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
-                             "fmt: '%s' current_buffer: '%s'",
+               if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
                              fmt, seq_buf_str(&iter->seq.seq))) {
                        int ret;
  
@@@ -3954,8 -4024,6 +4020,8 @@@ void tracing_iter_reset(struct trace_it
                        break;
                entries++;
                ring_buffer_iter_advance(buf_iter);
 +              /* This could be a big loop */
 +              cond_resched();
        }
  
        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
@@@ -4919,6 -4987,11 +4985,11 @@@ static int tracing_open(struct inode *i
  static bool
  trace_ok_for_array(struct tracer *t, struct trace_array *tr)
  {
+ #ifdef CONFIG_TRACER_SNAPSHOT
+       /* arrays with mapped buffer range do not have snapshots */
+       if (tr->range_addr_start && t->use_max_tr)
+               return false;
+ #endif
        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
  }
  
@@@ -5011,7 -5084,7 +5082,7 @@@ static int show_traces_open(struct inod
        return 0;
  }
  
- static int show_traces_release(struct inode *inode, struct file *file)
+ static int tracing_seq_release(struct inode *inode, struct file *file)
  {
        struct trace_array *tr = inode->i_private;
  
@@@ -5052,7 -5125,7 +5123,7 @@@ static const struct file_operations sho
        .open           = show_traces_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
-       .release        = show_traces_release,
+       .release        = tracing_seq_release,
  };
  
  static ssize_t
@@@ -5237,7 -5310,8 +5308,8 @@@ int trace_keep_overwrite(struct tracer 
  int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
  {
        if ((mask == TRACE_ITER_RECORD_TGID) ||
-           (mask == TRACE_ITER_RECORD_CMD))
+           (mask == TRACE_ITER_RECORD_CMD) ||
+           (mask == TRACE_ITER_TRACE_PRINTK))
                lockdep_assert_held(&event_mutex);
  
        /* do nothing if flag is already set */
                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
                        return -EINVAL;
  
+       if (mask == TRACE_ITER_TRACE_PRINTK) {
+               if (enabled) {
+                       update_printk_trace(tr);
+               } else {
+                       /*
+                        * The global_trace cannot clear this.
+                        * It's flag only gets cleared if another instance sets it.
+                        */
+                       if (printk_trace == &global_trace)
+                               return -EINVAL;
+                       /*
+                        * An instance must always have it set.
+                        * by default, that's the global_trace instane.
+                        */
+                       if (printk_trace == tr)
+                               update_printk_trace(&global_trace);
+               }
+       }
        if (enabled)
                tr->trace_flags |= mask;
        else
        return ret;
  }
  
+ static void update_last_data(struct trace_array *tr)
+ {
+       if (!tr->text_delta && !tr->data_delta)
+               return;
+       /* Clear old data */
+       tracing_reset_online_cpus(&tr->array_buffer);
+       /* Using current data now */
+       tr->text_delta = 0;
+       tr->data_delta = 0;
+ }
  
  /**
   * tracing_update_buffers - used by tracing facility to expand ring buffers
@@@ -6051,6 -6156,9 +6154,9 @@@ int tracing_update_buffers(struct trace
        int ret = 0;
  
        mutex_lock(&trace_types_lock);
+       update_last_data(tr);
        if (!tr->ring_buffer_expanded)
                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
                                                RING_BUFFER_ALL_CPUS);
@@@ -6106,6 -6214,8 +6212,8 @@@ int tracing_set_tracer(struct trace_arr
  
        mutex_lock(&trace_types_lock);
  
+       update_last_data(tr);
        if (!tr->ring_buffer_expanded) {
                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
                                                RING_BUFFER_ALL_CPUS);
@@@ -6853,6 -6963,37 +6961,37 @@@ tracing_total_entries_read(struct file 
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
  
+ static ssize_t
+ tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+ {
+       struct trace_array *tr = filp->private_data;
+       struct seq_buf seq;
+       char buf[64];
+       seq_buf_init(&seq, buf, 64);
+       seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
+       seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
+ }
+ static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
+ {
+       struct trace_array *tr = inode->i_private;
+       int cpu = tracing_get_cpu(inode);
+       int ret;
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
+       ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
+       if (ret < 0)
+               __trace_array_put(tr);
+       return ret;
+ }
  static ssize_t
  tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
                          size_t cnt, loff_t *ppos)
@@@ -7429,6 -7570,13 +7568,13 @@@ static const struct file_operations tra
        .release        = tracing_release_generic_tr,
  };
  
+ static const struct file_operations tracing_buffer_meta_fops = {
+       .open           = tracing_buffer_meta_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = tracing_seq_release,
+ };
  static const struct file_operations tracing_total_entries_fops = {
        .open           = tracing_open_generic_tr,
        .read           = tracing_total_entries_read,
@@@ -7469,6 -7617,13 +7615,13 @@@ static const struct file_operations tra
        .release        = tracing_single_release_tr,
  };
  
+ static const struct file_operations last_boot_fops = {
+       .open           = tracing_open_generic_tr,
+       .read           = tracing_last_boot_read,
+       .llseek         = generic_file_llseek,
+       .release        = tracing_release_generic_tr,
+ };
  #ifdef CONFIG_TRACER_SNAPSHOT
  static const struct file_operations snapshot_fops = {
        .open           = tracing_snapshot_open,
@@@ -7954,7 -8109,7 +8107,7 @@@ tracing_buffers_read(struct file *filp
        trace_access_unlock(iter->cpu_file);
  
        if (ret < 0) {
 -              if (trace_empty(iter)) {
 +              if (trace_empty(iter) && !iter->closed) {
                        if ((filp->f_flags & O_NONBLOCK))
                                return -EAGAIN;
  
@@@ -8661,12 -8816,17 +8814,17 @@@ tracing_init_tracefs_percpu(struct trac
        trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
                                tr, cpu, &tracing_entries_fops);
  
+       if (tr->range_addr_start)
+               trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
+                                     tr, cpu, &tracing_buffer_meta_fops);
  #ifdef CONFIG_TRACER_SNAPSHOT
-       trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
-                               tr, cpu, &snapshot_fops);
+       if (!tr->range_addr_start) {
+               trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
+                                     tr, cpu, &snapshot_fops);
  
-       trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
-                               tr, cpu, &snapshot_raw_fops);
+               trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
+                                     tr, cpu, &snapshot_raw_fops);
+       }
  #endif
  }
  
@@@ -9203,7 -9363,21 +9361,21 @@@ allocate_trace_buffer(struct trace_arra
  
        buf->tr = tr;
  
-       buf->buffer = ring_buffer_alloc(size, rb_flags);
+       if (tr->range_addr_start && tr->range_addr_size) {
+               buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
+                                                     tr->range_addr_start,
+                                                     tr->range_addr_size);
+               ring_buffer_last_boot_delta(buf->buffer,
+                                           &tr->text_delta, &tr->data_delta);
+               /*
+                * This is basically the same as a mapped buffer,
+                * with the same restrictions.
+                */
+               tr->mapped++;
+       } else {
+               buf->buffer = ring_buffer_alloc(size, rb_flags);
+       }
        if (!buf->buffer)
                return -ENOMEM;
  
@@@ -9240,6 -9414,10 +9412,10 @@@ static int allocate_trace_buffers(struc
                return ret;
  
  #ifdef CONFIG_TRACER_MAX_TRACE
+       /* Fix mapped buffer trace arrays do not have snapshot buffers */
+       if (tr->range_addr_start)
+               return 0;
        ret = allocate_trace_buffer(tr, &tr->max_buffer,
                                    allocate_snapshot ? size : 1);
        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
@@@ -9340,7 -9518,9 +9516,9 @@@ static int trace_array_create_dir(struc
  }
  
  static struct trace_array *
- trace_array_create_systems(const char *name, const char *systems)
+ trace_array_create_systems(const char *name, const char *systems,
+                          unsigned long range_addr_start,
+                          unsigned long range_addr_size)
  {
        struct trace_array *tr;
        int ret;
                        goto out_free_tr;
        }
  
+       /* Only for boot up memory mapped ring buffers */
+       tr->range_addr_start = range_addr_start;
+       tr->range_addr_size = range_addr_size;
        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
  
        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
  
  static struct trace_array *trace_array_create(const char *name)
  {
-       return trace_array_create_systems(name, NULL);
+       return trace_array_create_systems(name, NULL, 0, 0);
  }
  
  static int instance_mkdir(const char *name)
@@@ -9448,6 -9632,31 +9630,31 @@@ out_unlock
        return ret;
  }
  
+ static u64 map_pages(u64 start, u64 size)
+ {
+       struct page **pages;
+       phys_addr_t page_start;
+       unsigned int page_count;
+       unsigned int i;
+       void *vaddr;
+       page_count = DIV_ROUND_UP(size, PAGE_SIZE);
+       page_start = start;
+       pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+       if (!pages)
+               return 0;
+       for (i = 0; i < page_count; i++) {
+               phys_addr_t addr = page_start + i * PAGE_SIZE;
+               pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+       }
+       vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
+       kfree(pages);
+       return (u64)(unsigned long)vaddr;
+ }
  /**
   * trace_array_get_by_name - Create/Lookup a trace array, given its name.
   * @name: The name of the trace array to be looked up/created.
@@@ -9477,7 -9686,7 +9684,7 @@@ struct trace_array *trace_array_get_by_
                        goto out_unlock;
        }
  
-       tr = trace_array_create_systems(name, systems);
+       tr = trace_array_create_systems(name, systems, 0, 0);
  
        if (IS_ERR(tr))
                tr = NULL;
@@@ -9507,6 -9716,9 +9714,9 @@@ static int __remove_instance(struct tra
                        set_tracer_flag(tr, 1 << i, 0);
        }
  
+       if (printk_trace == tr)
+               update_printk_trace(&global_trace);
        tracing_set_nop(tr);
        clear_ftrace_function_probes(tr);
        event_trace_del_tracer(tr);
@@@ -9669,10 -9881,15 +9879,15 @@@ init_tracer_tracefs(struct trace_array 
        if (ftrace_create_function_files(tr, d_tracer))
                MEM_FAIL(1, "Could not allocate function filter files");
  
+       if (tr->range_addr_start) {
+               trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
+                                 tr, &last_boot_fops);
  #ifdef CONFIG_TRACER_SNAPSHOT
-       trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
-                         tr, &snapshot_fops);
+       } else {
+               trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
+                                 tr, &snapshot_fops);
  #endif
+       }
  
        trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
                          tr, &tracing_err_log_fops);
@@@ -10292,6 -10509,7 +10507,7 @@@ __init static void enable_instances(voi
  {
        struct trace_array *tr;
        char *curr_str;
+       char *name;
        char *str;
        char *tok;
  
        str = boot_instance_info;
  
        while ((curr_str = strsep(&str, "\t"))) {
+               phys_addr_t start = 0;
+               phys_addr_t size = 0;
+               unsigned long addr = 0;
+               bool traceprintk = false;
+               bool traceoff = false;
+               char *flag_delim;
+               char *addr_delim;
  
                tok = strsep(&curr_str, ",");
  
-               if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
-                       do_allocate_snapshot(tok);
+               flag_delim = strchr(tok, '^');
+               addr_delim = strchr(tok, '@');
  
-               tr = trace_array_get_by_name(tok, NULL);
-               if (!tr) {
-                       pr_warn("Failed to create instance buffer %s\n", curr_str);
+               if (addr_delim)
+                       *addr_delim++ = '\0';
+               if (flag_delim)
+                       *flag_delim++ = '\0';
+               name = tok;
+               if (flag_delim) {
+                       char *flag;
+                       while ((flag = strsep(&flag_delim, "^"))) {
+                               if (strcmp(flag, "traceoff") == 0) {
+                                       traceoff = true;
+                               } else if ((strcmp(flag, "printk") == 0) ||
+                                          (strcmp(flag, "traceprintk") == 0) ||
+                                          (strcmp(flag, "trace_printk") == 0)) {
+                                       traceprintk = true;
+                               } else {
+                                       pr_info("Tracing: Invalid instance flag '%s' for %s\n",
+                                               flag, name);
+                               }
+                       }
+               }
+               tok = addr_delim;
+               if (tok && isdigit(*tok)) {
+                       start = memparse(tok, &tok);
+                       if (!start) {
+                               pr_warn("Tracing: Invalid boot instance address for %s\n",
+                                       name);
+                               continue;
+                       }
+                       if (*tok != ':') {
+                               pr_warn("Tracing: No size specified for instance %s\n", name);
+                               continue;
+                       }
+                       tok++;
+                       size = memparse(tok, &tok);
+                       if (!size) {
+                               pr_warn("Tracing: Invalid boot instance size for %s\n",
+                                       name);
+                               continue;
+                       }
+               } else if (tok) {
+                       if (!reserve_mem_find_by_name(tok, &start, &size)) {
+                               start = 0;
+                               pr_warn("Failed to map boot instance %s to %s\n", name, tok);
+                               continue;
+                       }
+               }
+               if (start) {
+                       addr = map_pages(start, size);
+                       if (addr) {
+                               pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
+                                       name, &start, (unsigned long)size);
+                       } else {
+                               pr_warn("Tracing: Failed to map boot instance %s\n", name);
+                               continue;
+                       }
+               } else {
+                       /* Only non mapped buffers have snapshot buffers */
+                       if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+                               do_allocate_snapshot(name);
+               }
+               tr = trace_array_create_systems(name, NULL, addr, size);
+               if (IS_ERR(tr)) {
+                       pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
                        continue;
                }
-               /* Allow user space to delete it */
-               trace_array_put(tr);
+               if (traceoff)
+                       tracer_tracing_off(tr);
+               if (traceprintk)
+                       update_printk_trace(tr);
+               /*
+                * If start is set, then this is a mapped buffer, and
+                * cannot be deleted by user space, so keep the reference
+                * to it.
+                */
+               if (start)
+                       tr->flags |= TRACE_ARRAY_FL_BOOT;
+               else
+                       trace_array_put(tr);
  
                while ((tok = strsep(&curr_str, ","))) {
                        early_enable_events(tr, tok, true);
This page took 0.174995 seconds and 4 git commands to generate.