]> Git Repo - J-linux.git/blob - kernel/trace/trace.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149
150 union trace_eval_map_item;
151
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160
161 static DEFINE_MUTEX(trace_eval_mutex);
162
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195
196 static int __init set_cmdline_ftrace(char *str)
197 {
198         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199         default_bootup_tracer = bootup_tracer_buf;
200         /* We are using ftrace early, expand it */
201         trace_set_ring_buffer_expanded(NULL);
202         return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205
206 int ftrace_dump_on_oops_enabled(void)
207 {
208         if (!strcmp("0", ftrace_dump_on_oops))
209                 return 0;
210         else
211                 return 1;
212 }
213
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216         if (!*str) {
217                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218                 return 1;
219         }
220
221         if (*str == ',') {
222                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223                 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224                 return 1;
225         }
226
227         if (*str++ == '=') {
228                 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229                 return 1;
230         }
231
232         return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235
236 static int __init stop_trace_on_warning(char *str)
237 {
238         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239                 __disable_trace_on_warning = 1;
240         return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243
244 static int __init boot_alloc_snapshot(char *str)
245 {
246         char *slot = boot_snapshot_info + boot_snapshot_index;
247         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248         int ret;
249
250         if (str[0] == '=') {
251                 str++;
252                 if (strlen(str) >= left)
253                         return -1;
254
255                 ret = snprintf(slot, left, "%s\t", str);
256                 boot_snapshot_index += ret;
257         } else {
258                 allocate_snapshot = true;
259                 /* We also need the main ring buffer expanded */
260                 trace_set_ring_buffer_expanded(NULL);
261         }
262         return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265
266
267 static int __init boot_snapshot(char *str)
268 {
269         snapshot_at_boot = true;
270         boot_alloc_snapshot(str);
271         return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274
275
276 static int __init boot_instance(char *str)
277 {
278         char *slot = boot_instance_info + boot_instance_index;
279         int left = sizeof(boot_instance_info) - boot_instance_index;
280         int ret;
281
282         if (strlen(str) >= left)
283                 return -1;
284
285         ret = snprintf(slot, left, "%s\t", str);
286         boot_instance_index += ret;
287
288         return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291
292
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294
295 static int __init set_trace_boot_options(char *str)
296 {
297         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298         return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304
305 static int __init set_trace_boot_clock(char *str)
306 {
307         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308         trace_boot_clock = trace_boot_clock_buf;
309         return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312
313 static int __init set_tracepoint_printk(char *str)
314 {
315         /* Ignore the "tp_printk_stop_on_boot" param */
316         if (*str == '_')
317                 return 0;
318
319         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320                 tracepoint_printk = 1;
321         return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327         tracepoint_printk_stop_on_boot = true;
328         return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331
332 unsigned long long ns2usecs(u64 nsec)
333 {
334         nsec += 500;
335         do_div(nsec, 1000);
336         return nsec;
337 }
338
339 static void
340 trace_process_export(struct trace_export *export,
341                struct ring_buffer_event *event, int flag)
342 {
343         struct trace_entry *entry;
344         unsigned int size = 0;
345
346         if (export->flags & flag) {
347                 entry = ring_buffer_event_data(event);
348                 size = ring_buffer_event_length(event);
349                 export->write(export, entry, size);
350         }
351 }
352
353 static DEFINE_MUTEX(ftrace_export_lock);
354
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363         if (export->flags & TRACE_EXPORT_FUNCTION)
364                 static_branch_inc(&trace_function_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_EVENT)
367                 static_branch_inc(&trace_event_exports_enabled);
368
369         if (export->flags & TRACE_EXPORT_MARKER)
370                 static_branch_inc(&trace_marker_exports_enabled);
371 }
372
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375         if (export->flags & TRACE_EXPORT_FUNCTION)
376                 static_branch_dec(&trace_function_exports_enabled);
377
378         if (export->flags & TRACE_EXPORT_EVENT)
379                 static_branch_dec(&trace_event_exports_enabled);
380
381         if (export->flags & TRACE_EXPORT_MARKER)
382                 static_branch_dec(&trace_marker_exports_enabled);
383 }
384
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387         struct trace_export *export;
388
389         preempt_disable_notrace();
390
391         export = rcu_dereference_raw_check(ftrace_exports_list);
392         while (export) {
393                 trace_process_export(export, event, flag);
394                 export = rcu_dereference_raw_check(export->next);
395         }
396
397         preempt_enable_notrace();
398 }
399
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403         rcu_assign_pointer(export->next, *list);
404         /*
405          * We are entering export into the list but another
406          * CPU might be walking that list. We need to make sure
407          * the export->next pointer is valid before another CPU sees
408          * the export pointer included into the list.
409          */
410         rcu_assign_pointer(*list, export);
411 }
412
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416         struct trace_export **p;
417
418         for (p = list; *p != NULL; p = &(*p)->next)
419                 if (*p == export)
420                         break;
421
422         if (*p != export)
423                 return -1;
424
425         rcu_assign_pointer(*p, (*p)->next);
426
427         return 0;
428 }
429
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433         ftrace_exports_enable(export);
434
435         add_trace_export(list, export);
436 }
437
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441         int ret;
442
443         ret = rm_trace_export(list, export);
444         ftrace_exports_disable(export);
445
446         return ret;
447 }
448
449 int register_ftrace_export(struct trace_export *export)
450 {
451         if (WARN_ON_ONCE(!export->write))
452                 return -1;
453
454         mutex_lock(&ftrace_export_lock);
455
456         add_ftrace_export(&ftrace_exports_list, export);
457
458         mutex_unlock(&ftrace_export_lock);
459
460         return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466         int ret;
467
468         mutex_lock(&ftrace_export_lock);
469
470         ret = rm_ftrace_export(&ftrace_exports_list, export);
471
472         mutex_unlock(&ftrace_export_lock);
473
474         return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS                                             \
480         (FUNCTION_DEFAULT_FLAGS |                                       \
481          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
482          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
483          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
484          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
485          TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
486
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
489                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
494
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500         .trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502
503 static struct trace_array *printk_trace = &global_trace;
504
505 static __always_inline bool printk_binsafe(struct trace_array *tr)
506 {
507         /*
508          * The binary format of traceprintk can cause a crash if used
509          * by a buffer from another boot. Force the use of the
510          * non binary version of trace_printk if the trace_printk
511          * buffer is a boot mapped ring buffer.
512          */
513         return !(tr->flags & TRACE_ARRAY_FL_BOOT);
514 }
515
516 static void update_printk_trace(struct trace_array *tr)
517 {
518         if (printk_trace == tr)
519                 return;
520
521         printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
522         printk_trace = tr;
523         tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
524 }
525
526 void trace_set_ring_buffer_expanded(struct trace_array *tr)
527 {
528         if (!tr)
529                 tr = &global_trace;
530         tr->ring_buffer_expanded = true;
531 }
532
533 LIST_HEAD(ftrace_trace_arrays);
534
535 int trace_array_get(struct trace_array *this_tr)
536 {
537         struct trace_array *tr;
538         int ret = -ENODEV;
539
540         mutex_lock(&trace_types_lock);
541         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
542                 if (tr == this_tr) {
543                         tr->ref++;
544                         ret = 0;
545                         break;
546                 }
547         }
548         mutex_unlock(&trace_types_lock);
549
550         return ret;
551 }
552
553 static void __trace_array_put(struct trace_array *this_tr)
554 {
555         WARN_ON(!this_tr->ref);
556         this_tr->ref--;
557 }
558
559 /**
560  * trace_array_put - Decrement the reference counter for this trace array.
561  * @this_tr : pointer to the trace array
562  *
563  * NOTE: Use this when we no longer need the trace array returned by
564  * trace_array_get_by_name(). This ensures the trace array can be later
565  * destroyed.
566  *
567  */
568 void trace_array_put(struct trace_array *this_tr)
569 {
570         if (!this_tr)
571                 return;
572
573         mutex_lock(&trace_types_lock);
574         __trace_array_put(this_tr);
575         mutex_unlock(&trace_types_lock);
576 }
577 EXPORT_SYMBOL_GPL(trace_array_put);
578
579 int tracing_check_open_get_tr(struct trace_array *tr)
580 {
581         int ret;
582
583         ret = security_locked_down(LOCKDOWN_TRACEFS);
584         if (ret)
585                 return ret;
586
587         if (tracing_disabled)
588                 return -ENODEV;
589
590         if (tr && trace_array_get(tr) < 0)
591                 return -ENODEV;
592
593         return 0;
594 }
595
596 /**
597  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
598  * @filtered_pids: The list of pids to check
599  * @search_pid: The PID to find in @filtered_pids
600  *
601  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
602  */
603 bool
604 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
605 {
606         return trace_pid_list_is_set(filtered_pids, search_pid);
607 }
608
609 /**
610  * trace_ignore_this_task - should a task be ignored for tracing
611  * @filtered_pids: The list of pids to check
612  * @filtered_no_pids: The list of pids not to be traced
613  * @task: The task that should be ignored if not filtered
614  *
615  * Checks if @task should be traced or not from @filtered_pids.
616  * Returns true if @task should *NOT* be traced.
617  * Returns false if @task should be traced.
618  */
619 bool
620 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
621                        struct trace_pid_list *filtered_no_pids,
622                        struct task_struct *task)
623 {
624         /*
625          * If filtered_no_pids is not empty, and the task's pid is listed
626          * in filtered_no_pids, then return true.
627          * Otherwise, if filtered_pids is empty, that means we can
628          * trace all tasks. If it has content, then only trace pids
629          * within filtered_pids.
630          */
631
632         return (filtered_pids &&
633                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
634                 (filtered_no_pids &&
635                  trace_find_filtered_pid(filtered_no_pids, task->pid));
636 }
637
638 /**
639  * trace_filter_add_remove_task - Add or remove a task from a pid_list
640  * @pid_list: The list to modify
641  * @self: The current task for fork or NULL for exit
642  * @task: The task to add or remove
643  *
644  * If adding a task, if @self is defined, the task is only added if @self
645  * is also included in @pid_list. This happens on fork and tasks should
646  * only be added when the parent is listed. If @self is NULL, then the
647  * @task pid will be removed from the list, which would happen on exit
648  * of a task.
649  */
650 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
651                                   struct task_struct *self,
652                                   struct task_struct *task)
653 {
654         if (!pid_list)
655                 return;
656
657         /* For forks, we only add if the forking task is listed */
658         if (self) {
659                 if (!trace_find_filtered_pid(pid_list, self->pid))
660                         return;
661         }
662
663         /* "self" is set for forks, and NULL for exits */
664         if (self)
665                 trace_pid_list_set(pid_list, task->pid);
666         else
667                 trace_pid_list_clear(pid_list, task->pid);
668 }
669
670 /**
671  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
672  * @pid_list: The pid list to show
673  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
674  * @pos: The position of the file
675  *
676  * This is used by the seq_file "next" operation to iterate the pids
677  * listed in a trace_pid_list structure.
678  *
679  * Returns the pid+1 as we want to display pid of zero, but NULL would
680  * stop the iteration.
681  */
682 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
683 {
684         long pid = (unsigned long)v;
685         unsigned int next;
686
687         (*pos)++;
688
689         /* pid already is +1 of the actual previous bit */
690         if (trace_pid_list_next(pid_list, pid, &next) < 0)
691                 return NULL;
692
693         pid = next;
694
695         /* Return pid + 1 to allow zero to be represented */
696         return (void *)(pid + 1);
697 }
698
699 /**
700  * trace_pid_start - Used for seq_file to start reading pid lists
701  * @pid_list: The pid list to show
702  * @pos: The position of the file
703  *
704  * This is used by seq_file "start" operation to start the iteration
705  * of listing pids.
706  *
707  * Returns the pid+1 as we want to display pid of zero, but NULL would
708  * stop the iteration.
709  */
710 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
711 {
712         unsigned long pid;
713         unsigned int first;
714         loff_t l = 0;
715
716         if (trace_pid_list_first(pid_list, &first) < 0)
717                 return NULL;
718
719         pid = first;
720
721         /* Return pid + 1 so that zero can be the exit value */
722         for (pid++; pid && l < *pos;
723              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
724                 ;
725         return (void *)pid;
726 }
727
728 /**
729  * trace_pid_show - show the current pid in seq_file processing
730  * @m: The seq_file structure to write into
731  * @v: A void pointer of the pid (+1) value to display
732  *
733  * Can be directly used by seq_file operations to display the current
734  * pid value.
735  */
736 int trace_pid_show(struct seq_file *m, void *v)
737 {
738         unsigned long pid = (unsigned long)v - 1;
739
740         seq_printf(m, "%lu\n", pid);
741         return 0;
742 }
743
744 /* 128 should be much more than enough */
745 #define PID_BUF_SIZE            127
746
747 int trace_pid_write(struct trace_pid_list *filtered_pids,
748                     struct trace_pid_list **new_pid_list,
749                     const char __user *ubuf, size_t cnt)
750 {
751         struct trace_pid_list *pid_list;
752         struct trace_parser parser;
753         unsigned long val;
754         int nr_pids = 0;
755         ssize_t read = 0;
756         ssize_t ret;
757         loff_t pos;
758         pid_t pid;
759
760         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
761                 return -ENOMEM;
762
763         /*
764          * Always recreate a new array. The write is an all or nothing
765          * operation. Always create a new array when adding new pids by
766          * the user. If the operation fails, then the current list is
767          * not modified.
768          */
769         pid_list = trace_pid_list_alloc();
770         if (!pid_list) {
771                 trace_parser_put(&parser);
772                 return -ENOMEM;
773         }
774
775         if (filtered_pids) {
776                 /* copy the current bits to the new max */
777                 ret = trace_pid_list_first(filtered_pids, &pid);
778                 while (!ret) {
779                         trace_pid_list_set(pid_list, pid);
780                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
781                         nr_pids++;
782                 }
783         }
784
785         ret = 0;
786         while (cnt > 0) {
787
788                 pos = 0;
789
790                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
791                 if (ret < 0)
792                         break;
793
794                 read += ret;
795                 ubuf += ret;
796                 cnt -= ret;
797
798                 if (!trace_parser_loaded(&parser))
799                         break;
800
801                 ret = -EINVAL;
802                 if (kstrtoul(parser.buffer, 0, &val))
803                         break;
804
805                 pid = (pid_t)val;
806
807                 if (trace_pid_list_set(pid_list, pid) < 0) {
808                         ret = -1;
809                         break;
810                 }
811                 nr_pids++;
812
813                 trace_parser_clear(&parser);
814                 ret = 0;
815         }
816         trace_parser_put(&parser);
817
818         if (ret < 0) {
819                 trace_pid_list_free(pid_list);
820                 return ret;
821         }
822
823         if (!nr_pids) {
824                 /* Cleared the list of pids */
825                 trace_pid_list_free(pid_list);
826                 pid_list = NULL;
827         }
828
829         *new_pid_list = pid_list;
830
831         return read;
832 }
833
834 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
835 {
836         u64 ts;
837
838         /* Early boot up does not have a buffer yet */
839         if (!buf->buffer)
840                 return trace_clock_local();
841
842         ts = ring_buffer_time_stamp(buf->buffer);
843         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
844
845         return ts;
846 }
847
848 u64 ftrace_now(int cpu)
849 {
850         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
851 }
852
853 /**
854  * tracing_is_enabled - Show if global_trace has been enabled
855  *
856  * Shows if the global trace has been enabled or not. It uses the
857  * mirror flag "buffer_disabled" to be used in fast paths such as for
858  * the irqsoff tracer. But it may be inaccurate due to races. If you
859  * need to know the accurate state, use tracing_is_on() which is a little
860  * slower, but accurate.
861  */
862 int tracing_is_enabled(void)
863 {
864         /*
865          * For quick access (irqsoff uses this in fast path), just
866          * return the mirror variable of the state of the ring buffer.
867          * It's a little racy, but we don't really care.
868          */
869         smp_rmb();
870         return !global_trace.buffer_disabled;
871 }
872
873 /*
874  * trace_buf_size is the size in bytes that is allocated
875  * for a buffer. Note, the number of bytes is always rounded
876  * to page size.
877  *
878  * This number is purposely set to a low number of 16384.
879  * If the dump on oops happens, it will be much appreciated
880  * to not have to wait for all that output. Anyway this can be
881  * boot time and run time configurable.
882  */
883 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
884
885 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
886
887 /* trace_types holds a link list of available tracers. */
888 static struct tracer            *trace_types __read_mostly;
889
890 /*
891  * trace_types_lock is used to protect the trace_types list.
892  */
893 DEFINE_MUTEX(trace_types_lock);
894
895 /*
896  * serialize the access of the ring buffer
897  *
898  * ring buffer serializes readers, but it is low level protection.
899  * The validity of the events (which returns by ring_buffer_peek() ..etc)
900  * are not protected by ring buffer.
901  *
902  * The content of events may become garbage if we allow other process consumes
903  * these events concurrently:
904  *   A) the page of the consumed events may become a normal page
905  *      (not reader page) in ring buffer, and this page will be rewritten
906  *      by events producer.
907  *   B) The page of the consumed events may become a page for splice_read,
908  *      and this page will be returned to system.
909  *
910  * These primitives allow multi process access to different cpu ring buffer
911  * concurrently.
912  *
913  * These primitives don't distinguish read-only and read-consume access.
914  * Multi read-only access are also serialized.
915  */
916
917 #ifdef CONFIG_SMP
918 static DECLARE_RWSEM(all_cpu_access_lock);
919 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
920
921 static inline void trace_access_lock(int cpu)
922 {
923         if (cpu == RING_BUFFER_ALL_CPUS) {
924                 /* gain it for accessing the whole ring buffer. */
925                 down_write(&all_cpu_access_lock);
926         } else {
927                 /* gain it for accessing a cpu ring buffer. */
928
929                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
930                 down_read(&all_cpu_access_lock);
931
932                 /* Secondly block other access to this @cpu ring buffer. */
933                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
934         }
935 }
936
937 static inline void trace_access_unlock(int cpu)
938 {
939         if (cpu == RING_BUFFER_ALL_CPUS) {
940                 up_write(&all_cpu_access_lock);
941         } else {
942                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
943                 up_read(&all_cpu_access_lock);
944         }
945 }
946
947 static inline void trace_access_lock_init(void)
948 {
949         int cpu;
950
951         for_each_possible_cpu(cpu)
952                 mutex_init(&per_cpu(cpu_access_lock, cpu));
953 }
954
955 #else
956
957 static DEFINE_MUTEX(access_lock);
958
959 static inline void trace_access_lock(int cpu)
960 {
961         (void)cpu;
962         mutex_lock(&access_lock);
963 }
964
965 static inline void trace_access_unlock(int cpu)
966 {
967         (void)cpu;
968         mutex_unlock(&access_lock);
969 }
970
971 static inline void trace_access_lock_init(void)
972 {
973 }
974
975 #endif
976
977 #ifdef CONFIG_STACKTRACE
978 static void __ftrace_trace_stack(struct trace_array *tr,
979                                  struct trace_buffer *buffer,
980                                  unsigned int trace_ctx,
981                                  int skip, struct pt_regs *regs);
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983                                       struct trace_buffer *buffer,
984                                       unsigned int trace_ctx,
985                                       int skip, struct pt_regs *regs);
986
987 #else
988 static inline void __ftrace_trace_stack(struct trace_array *tr,
989                                         struct trace_buffer *buffer,
990                                         unsigned int trace_ctx,
991                                         int skip, struct pt_regs *regs)
992 {
993 }
994 static inline void ftrace_trace_stack(struct trace_array *tr,
995                                       struct trace_buffer *buffer,
996                                       unsigned long trace_ctx,
997                                       int skip, struct pt_regs *regs)
998 {
999 }
1000
1001 #endif
1002
1003 static __always_inline void
1004 trace_event_setup(struct ring_buffer_event *event,
1005                   int type, unsigned int trace_ctx)
1006 {
1007         struct trace_entry *ent = ring_buffer_event_data(event);
1008
1009         tracing_generic_entry_update(ent, type, trace_ctx);
1010 }
1011
1012 static __always_inline struct ring_buffer_event *
1013 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1014                           int type,
1015                           unsigned long len,
1016                           unsigned int trace_ctx)
1017 {
1018         struct ring_buffer_event *event;
1019
1020         event = ring_buffer_lock_reserve(buffer, len);
1021         if (event != NULL)
1022                 trace_event_setup(event, type, trace_ctx);
1023
1024         return event;
1025 }
1026
1027 void tracer_tracing_on(struct trace_array *tr)
1028 {
1029         if (tr->array_buffer.buffer)
1030                 ring_buffer_record_on(tr->array_buffer.buffer);
1031         /*
1032          * This flag is looked at when buffers haven't been allocated
1033          * yet, or by some tracers (like irqsoff), that just want to
1034          * know if the ring buffer has been disabled, but it can handle
1035          * races of where it gets disabled but we still do a record.
1036          * As the check is in the fast path of the tracers, it is more
1037          * important to be fast than accurate.
1038          */
1039         tr->buffer_disabled = 0;
1040         /* Make the flag seen by readers */
1041         smp_wmb();
1042 }
1043
1044 /**
1045  * tracing_on - enable tracing buffers
1046  *
1047  * This function enables tracing buffers that may have been
1048  * disabled with tracing_off.
1049  */
1050 void tracing_on(void)
1051 {
1052         tracer_tracing_on(&global_trace);
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_on);
1055
1056
1057 static __always_inline void
1058 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1059 {
1060         __this_cpu_write(trace_taskinfo_save, true);
1061
1062         /* If this is the temp buffer, we need to commit fully */
1063         if (this_cpu_read(trace_buffered_event) == event) {
1064                 /* Length is in event->array[0] */
1065                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1066                 /* Release the temp buffer */
1067                 this_cpu_dec(trace_buffered_event_cnt);
1068                 /* ring_buffer_unlock_commit() enables preemption */
1069                 preempt_enable_notrace();
1070         } else
1071                 ring_buffer_unlock_commit(buffer);
1072 }
1073
1074 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1075                        const char *str, int size)
1076 {
1077         struct ring_buffer_event *event;
1078         struct trace_buffer *buffer;
1079         struct print_entry *entry;
1080         unsigned int trace_ctx;
1081         int alloc;
1082
1083         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1084                 return 0;
1085
1086         if (unlikely(tracing_selftest_running && tr == &global_trace))
1087                 return 0;
1088
1089         if (unlikely(tracing_disabled))
1090                 return 0;
1091
1092         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1093
1094         trace_ctx = tracing_gen_ctx();
1095         buffer = tr->array_buffer.buffer;
1096         ring_buffer_nest_start(buffer);
1097         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1098                                             trace_ctx);
1099         if (!event) {
1100                 size = 0;
1101                 goto out;
1102         }
1103
1104         entry = ring_buffer_event_data(event);
1105         entry->ip = ip;
1106
1107         memcpy(&entry->buf, str, size);
1108
1109         /* Add a newline if necessary */
1110         if (entry->buf[size - 1] != '\n') {
1111                 entry->buf[size] = '\n';
1112                 entry->buf[size + 1] = '\0';
1113         } else
1114                 entry->buf[size] = '\0';
1115
1116         __buffer_unlock_commit(buffer, event);
1117         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1118  out:
1119         ring_buffer_nest_end(buffer);
1120         return size;
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_array_puts);
1123
1124 /**
1125  * __trace_puts - write a constant string into the trace buffer.
1126  * @ip:    The address of the caller
1127  * @str:   The constant string to write
1128  * @size:  The size of the string.
1129  */
1130 int __trace_puts(unsigned long ip, const char *str, int size)
1131 {
1132         return __trace_array_puts(printk_trace, ip, str, size);
1133 }
1134 EXPORT_SYMBOL_GPL(__trace_puts);
1135
1136 /**
1137  * __trace_bputs - write the pointer to a constant string into trace buffer
1138  * @ip:    The address of the caller
1139  * @str:   The constant string to write to the buffer to
1140  */
1141 int __trace_bputs(unsigned long ip, const char *str)
1142 {
1143         struct trace_array *tr = READ_ONCE(printk_trace);
1144         struct ring_buffer_event *event;
1145         struct trace_buffer *buffer;
1146         struct bputs_entry *entry;
1147         unsigned int trace_ctx;
1148         int size = sizeof(struct bputs_entry);
1149         int ret = 0;
1150
1151         if (!printk_binsafe(tr))
1152                 return __trace_puts(ip, str, strlen(str));
1153
1154         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1155                 return 0;
1156
1157         if (unlikely(tracing_selftest_running || tracing_disabled))
1158                 return 0;
1159
1160         trace_ctx = tracing_gen_ctx();
1161         buffer = tr->array_buffer.buffer;
1162
1163         ring_buffer_nest_start(buffer);
1164         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1165                                             trace_ctx);
1166         if (!event)
1167                 goto out;
1168
1169         entry = ring_buffer_event_data(event);
1170         entry->ip                       = ip;
1171         entry->str                      = str;
1172
1173         __buffer_unlock_commit(buffer, event);
1174         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175
1176         ret = 1;
1177  out:
1178         ring_buffer_nest_end(buffer);
1179         return ret;
1180 }
1181 EXPORT_SYMBOL_GPL(__trace_bputs);
1182
1183 #ifdef CONFIG_TRACER_SNAPSHOT
1184 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1185                                            void *cond_data)
1186 {
1187         struct tracer *tracer = tr->current_trace;
1188         unsigned long flags;
1189
1190         if (in_nmi()) {
1191                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1192                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1193                 return;
1194         }
1195
1196         if (!tr->allocated_snapshot) {
1197                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1198                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1199                 tracer_tracing_off(tr);
1200                 return;
1201         }
1202
1203         /* Note, snapshot can not be used when the tracer uses it */
1204         if (tracer->use_max_tr) {
1205                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1206                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1207                 return;
1208         }
1209
1210         if (tr->mapped) {
1211                 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1212                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213                 return;
1214         }
1215
1216         local_irq_save(flags);
1217         update_max_tr(tr, current, smp_processor_id(), cond_data);
1218         local_irq_restore(flags);
1219 }
1220
1221 void tracing_snapshot_instance(struct trace_array *tr)
1222 {
1223         tracing_snapshot_instance_cond(tr, NULL);
1224 }
1225
1226 /**
1227  * tracing_snapshot - take a snapshot of the current buffer.
1228  *
1229  * This causes a swap between the snapshot buffer and the current live
1230  * tracing buffer. You can use this to take snapshots of the live
1231  * trace when some condition is triggered, but continue to trace.
1232  *
1233  * Note, make sure to allocate the snapshot with either
1234  * a tracing_snapshot_alloc(), or by doing it manually
1235  * with: echo 1 > /sys/kernel/tracing/snapshot
1236  *
1237  * If the snapshot buffer is not allocated, it will stop tracing.
1238  * Basically making a permanent snapshot.
1239  */
1240 void tracing_snapshot(void)
1241 {
1242         struct trace_array *tr = &global_trace;
1243
1244         tracing_snapshot_instance(tr);
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot);
1247
1248 /**
1249  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1250  * @tr:         The tracing instance to snapshot
1251  * @cond_data:  The data to be tested conditionally, and possibly saved
1252  *
1253  * This is the same as tracing_snapshot() except that the snapshot is
1254  * conditional - the snapshot will only happen if the
1255  * cond_snapshot.update() implementation receiving the cond_data
1256  * returns true, which means that the trace array's cond_snapshot
1257  * update() operation used the cond_data to determine whether the
1258  * snapshot should be taken, and if it was, presumably saved it along
1259  * with the snapshot.
1260  */
1261 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1262 {
1263         tracing_snapshot_instance_cond(tr, cond_data);
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1266
1267 /**
1268  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1269  * @tr:         The tracing instance
1270  *
1271  * When the user enables a conditional snapshot using
1272  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1273  * with the snapshot.  This accessor is used to retrieve it.
1274  *
1275  * Should not be called from cond_snapshot.update(), since it takes
1276  * the tr->max_lock lock, which the code calling
1277  * cond_snapshot.update() has already done.
1278  *
1279  * Returns the cond_data associated with the trace array's snapshot.
1280  */
1281 void *tracing_cond_snapshot_data(struct trace_array *tr)
1282 {
1283         void *cond_data = NULL;
1284
1285         local_irq_disable();
1286         arch_spin_lock(&tr->max_lock);
1287
1288         if (tr->cond_snapshot)
1289                 cond_data = tr->cond_snapshot->cond_data;
1290
1291         arch_spin_unlock(&tr->max_lock);
1292         local_irq_enable();
1293
1294         return cond_data;
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1297
1298 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1299                                         struct array_buffer *size_buf, int cpu_id);
1300 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1301
1302 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1303 {
1304         int order;
1305         int ret;
1306
1307         if (!tr->allocated_snapshot) {
1308
1309                 /* Make the snapshot buffer have the same order as main buffer */
1310                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1311                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1312                 if (ret < 0)
1313                         return ret;
1314
1315                 /* allocate spare buffer */
1316                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1317                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1318                 if (ret < 0)
1319                         return ret;
1320
1321                 tr->allocated_snapshot = true;
1322         }
1323
1324         return 0;
1325 }
1326
1327 static void free_snapshot(struct trace_array *tr)
1328 {
1329         /*
1330          * We don't free the ring buffer. instead, resize it because
1331          * The max_tr ring buffer has some state (e.g. ring->clock) and
1332          * we want preserve it.
1333          */
1334         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1335         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1336         set_buffer_entries(&tr->max_buffer, 1);
1337         tracing_reset_online_cpus(&tr->max_buffer);
1338         tr->allocated_snapshot = false;
1339 }
1340
1341 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1342 {
1343         int ret;
1344
1345         lockdep_assert_held(&trace_types_lock);
1346
1347         spin_lock(&tr->snapshot_trigger_lock);
1348         if (tr->snapshot == UINT_MAX || tr->mapped) {
1349                 spin_unlock(&tr->snapshot_trigger_lock);
1350                 return -EBUSY;
1351         }
1352
1353         tr->snapshot++;
1354         spin_unlock(&tr->snapshot_trigger_lock);
1355
1356         ret = tracing_alloc_snapshot_instance(tr);
1357         if (ret) {
1358                 spin_lock(&tr->snapshot_trigger_lock);
1359                 tr->snapshot--;
1360                 spin_unlock(&tr->snapshot_trigger_lock);
1361         }
1362
1363         return ret;
1364 }
1365
1366 int tracing_arm_snapshot(struct trace_array *tr)
1367 {
1368         int ret;
1369
1370         mutex_lock(&trace_types_lock);
1371         ret = tracing_arm_snapshot_locked(tr);
1372         mutex_unlock(&trace_types_lock);
1373
1374         return ret;
1375 }
1376
1377 void tracing_disarm_snapshot(struct trace_array *tr)
1378 {
1379         spin_lock(&tr->snapshot_trigger_lock);
1380         if (!WARN_ON(!tr->snapshot))
1381                 tr->snapshot--;
1382         spin_unlock(&tr->snapshot_trigger_lock);
1383 }
1384
1385 /**
1386  * tracing_alloc_snapshot - allocate snapshot buffer.
1387  *
1388  * This only allocates the snapshot buffer if it isn't already
1389  * allocated - it doesn't also take a snapshot.
1390  *
1391  * This is meant to be used in cases where the snapshot buffer needs
1392  * to be set up for events that can't sleep but need to be able to
1393  * trigger a snapshot.
1394  */
1395 int tracing_alloc_snapshot(void)
1396 {
1397         struct trace_array *tr = &global_trace;
1398         int ret;
1399
1400         ret = tracing_alloc_snapshot_instance(tr);
1401         WARN_ON(ret < 0);
1402
1403         return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1406
1407 /**
1408  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1409  *
1410  * This is similar to tracing_snapshot(), but it will allocate the
1411  * snapshot buffer if it isn't already allocated. Use this only
1412  * where it is safe to sleep, as the allocation may sleep.
1413  *
1414  * This causes a swap between the snapshot buffer and the current live
1415  * tracing buffer. You can use this to take snapshots of the live
1416  * trace when some condition is triggered, but continue to trace.
1417  */
1418 void tracing_snapshot_alloc(void)
1419 {
1420         int ret;
1421
1422         ret = tracing_alloc_snapshot();
1423         if (ret < 0)
1424                 return;
1425
1426         tracing_snapshot();
1427 }
1428 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1429
1430 /**
1431  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1432  * @tr:         The tracing instance
1433  * @cond_data:  User data to associate with the snapshot
1434  * @update:     Implementation of the cond_snapshot update function
1435  *
1436  * Check whether the conditional snapshot for the given instance has
1437  * already been enabled, or if the current tracer is already using a
1438  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1439  * save the cond_data and update function inside.
1440  *
1441  * Returns 0 if successful, error otherwise.
1442  */
1443 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1444                                  cond_update_fn_t update)
1445 {
1446         struct cond_snapshot *cond_snapshot;
1447         int ret = 0;
1448
1449         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1450         if (!cond_snapshot)
1451                 return -ENOMEM;
1452
1453         cond_snapshot->cond_data = cond_data;
1454         cond_snapshot->update = update;
1455
1456         mutex_lock(&trace_types_lock);
1457
1458         if (tr->current_trace->use_max_tr) {
1459                 ret = -EBUSY;
1460                 goto fail_unlock;
1461         }
1462
1463         /*
1464          * The cond_snapshot can only change to NULL without the
1465          * trace_types_lock. We don't care if we race with it going
1466          * to NULL, but we want to make sure that it's not set to
1467          * something other than NULL when we get here, which we can
1468          * do safely with only holding the trace_types_lock and not
1469          * having to take the max_lock.
1470          */
1471         if (tr->cond_snapshot) {
1472                 ret = -EBUSY;
1473                 goto fail_unlock;
1474         }
1475
1476         ret = tracing_arm_snapshot_locked(tr);
1477         if (ret)
1478                 goto fail_unlock;
1479
1480         local_irq_disable();
1481         arch_spin_lock(&tr->max_lock);
1482         tr->cond_snapshot = cond_snapshot;
1483         arch_spin_unlock(&tr->max_lock);
1484         local_irq_enable();
1485
1486         mutex_unlock(&trace_types_lock);
1487
1488         return ret;
1489
1490  fail_unlock:
1491         mutex_unlock(&trace_types_lock);
1492         kfree(cond_snapshot);
1493         return ret;
1494 }
1495 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1496
1497 /**
1498  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1499  * @tr:         The tracing instance
1500  *
1501  * Check whether the conditional snapshot for the given instance is
1502  * enabled; if so, free the cond_snapshot associated with it,
1503  * otherwise return -EINVAL.
1504  *
1505  * Returns 0 if successful, error otherwise.
1506  */
1507 int tracing_snapshot_cond_disable(struct trace_array *tr)
1508 {
1509         int ret = 0;
1510
1511         local_irq_disable();
1512         arch_spin_lock(&tr->max_lock);
1513
1514         if (!tr->cond_snapshot)
1515                 ret = -EINVAL;
1516         else {
1517                 kfree(tr->cond_snapshot);
1518                 tr->cond_snapshot = NULL;
1519         }
1520
1521         arch_spin_unlock(&tr->max_lock);
1522         local_irq_enable();
1523
1524         tracing_disarm_snapshot(tr);
1525
1526         return ret;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1529 #else
1530 void tracing_snapshot(void)
1531 {
1532         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_snapshot);
1535 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1536 {
1537         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1538 }
1539 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1540 int tracing_alloc_snapshot(void)
1541 {
1542         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1543         return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1546 void tracing_snapshot_alloc(void)
1547 {
1548         /* Give warning */
1549         tracing_snapshot();
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1552 void *tracing_cond_snapshot_data(struct trace_array *tr)
1553 {
1554         return NULL;
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1557 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1558 {
1559         return -ENODEV;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1562 int tracing_snapshot_cond_disable(struct trace_array *tr)
1563 {
1564         return false;
1565 }
1566 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1567 #define free_snapshot(tr)       do { } while (0)
1568 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1569 #endif /* CONFIG_TRACER_SNAPSHOT */
1570
1571 void tracer_tracing_off(struct trace_array *tr)
1572 {
1573         if (tr->array_buffer.buffer)
1574                 ring_buffer_record_off(tr->array_buffer.buffer);
1575         /*
1576          * This flag is looked at when buffers haven't been allocated
1577          * yet, or by some tracers (like irqsoff), that just want to
1578          * know if the ring buffer has been disabled, but it can handle
1579          * races of where it gets disabled but we still do a record.
1580          * As the check is in the fast path of the tracers, it is more
1581          * important to be fast than accurate.
1582          */
1583         tr->buffer_disabled = 1;
1584         /* Make the flag seen by readers */
1585         smp_wmb();
1586 }
1587
1588 /**
1589  * tracing_off - turn off tracing buffers
1590  *
1591  * This function stops the tracing buffers from recording data.
1592  * It does not disable any overhead the tracers themselves may
1593  * be causing. This function simply causes all recording to
1594  * the ring buffers to fail.
1595  */
1596 void tracing_off(void)
1597 {
1598         tracer_tracing_off(&global_trace);
1599 }
1600 EXPORT_SYMBOL_GPL(tracing_off);
1601
1602 void disable_trace_on_warning(void)
1603 {
1604         if (__disable_trace_on_warning) {
1605                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1606                         "Disabling tracing due to warning\n");
1607                 tracing_off();
1608         }
1609 }
1610
1611 /**
1612  * tracer_tracing_is_on - show real state of ring buffer enabled
1613  * @tr : the trace array to know if ring buffer is enabled
1614  *
1615  * Shows real state of the ring buffer if it is enabled or not.
1616  */
1617 bool tracer_tracing_is_on(struct trace_array *tr)
1618 {
1619         if (tr->array_buffer.buffer)
1620                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1621         return !tr->buffer_disabled;
1622 }
1623
1624 /**
1625  * tracing_is_on - show state of ring buffers enabled
1626  */
1627 int tracing_is_on(void)
1628 {
1629         return tracer_tracing_is_on(&global_trace);
1630 }
1631 EXPORT_SYMBOL_GPL(tracing_is_on);
1632
1633 static int __init set_buf_size(char *str)
1634 {
1635         unsigned long buf_size;
1636
1637         if (!str)
1638                 return 0;
1639         buf_size = memparse(str, &str);
1640         /*
1641          * nr_entries can not be zero and the startup
1642          * tests require some buffer space. Therefore
1643          * ensure we have at least 4096 bytes of buffer.
1644          */
1645         trace_buf_size = max(4096UL, buf_size);
1646         return 1;
1647 }
1648 __setup("trace_buf_size=", set_buf_size);
1649
1650 static int __init set_tracing_thresh(char *str)
1651 {
1652         unsigned long threshold;
1653         int ret;
1654
1655         if (!str)
1656                 return 0;
1657         ret = kstrtoul(str, 0, &threshold);
1658         if (ret < 0)
1659                 return 0;
1660         tracing_thresh = threshold * 1000;
1661         return 1;
1662 }
1663 __setup("tracing_thresh=", set_tracing_thresh);
1664
1665 unsigned long nsecs_to_usecs(unsigned long nsecs)
1666 {
1667         return nsecs / 1000;
1668 }
1669
1670 /*
1671  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1672  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1673  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1674  * of strings in the order that the evals (enum) were defined.
1675  */
1676 #undef C
1677 #define C(a, b) b
1678
1679 /* These must match the bit positions in trace_iterator_flags */
1680 static const char *trace_options[] = {
1681         TRACE_FLAGS
1682         NULL
1683 };
1684
1685 static struct {
1686         u64 (*func)(void);
1687         const char *name;
1688         int in_ns;              /* is this clock in nanoseconds? */
1689 } trace_clocks[] = {
1690         { trace_clock_local,            "local",        1 },
1691         { trace_clock_global,           "global",       1 },
1692         { trace_clock_counter,          "counter",      0 },
1693         { trace_clock_jiffies,          "uptime",       0 },
1694         { trace_clock,                  "perf",         1 },
1695         { ktime_get_mono_fast_ns,       "mono",         1 },
1696         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1697         { ktime_get_boot_fast_ns,       "boot",         1 },
1698         { ktime_get_tai_fast_ns,        "tai",          1 },
1699         ARCH_TRACE_CLOCKS
1700 };
1701
1702 bool trace_clock_in_ns(struct trace_array *tr)
1703 {
1704         if (trace_clocks[tr->clock_id].in_ns)
1705                 return true;
1706
1707         return false;
1708 }
1709
1710 /*
1711  * trace_parser_get_init - gets the buffer for trace parser
1712  */
1713 int trace_parser_get_init(struct trace_parser *parser, int size)
1714 {
1715         memset(parser, 0, sizeof(*parser));
1716
1717         parser->buffer = kmalloc(size, GFP_KERNEL);
1718         if (!parser->buffer)
1719                 return 1;
1720
1721         parser->size = size;
1722         return 0;
1723 }
1724
1725 /*
1726  * trace_parser_put - frees the buffer for trace parser
1727  */
1728 void trace_parser_put(struct trace_parser *parser)
1729 {
1730         kfree(parser->buffer);
1731         parser->buffer = NULL;
1732 }
1733
1734 /*
1735  * trace_get_user - reads the user input string separated by  space
1736  * (matched by isspace(ch))
1737  *
1738  * For each string found the 'struct trace_parser' is updated,
1739  * and the function returns.
1740  *
1741  * Returns number of bytes read.
1742  *
1743  * See kernel/trace/trace.h for 'struct trace_parser' details.
1744  */
1745 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1746         size_t cnt, loff_t *ppos)
1747 {
1748         char ch;
1749         size_t read = 0;
1750         ssize_t ret;
1751
1752         if (!*ppos)
1753                 trace_parser_clear(parser);
1754
1755         ret = get_user(ch, ubuf++);
1756         if (ret)
1757                 goto out;
1758
1759         read++;
1760         cnt--;
1761
1762         /*
1763          * The parser is not finished with the last write,
1764          * continue reading the user input without skipping spaces.
1765          */
1766         if (!parser->cont) {
1767                 /* skip white space */
1768                 while (cnt && isspace(ch)) {
1769                         ret = get_user(ch, ubuf++);
1770                         if (ret)
1771                                 goto out;
1772                         read++;
1773                         cnt--;
1774                 }
1775
1776                 parser->idx = 0;
1777
1778                 /* only spaces were written */
1779                 if (isspace(ch) || !ch) {
1780                         *ppos += read;
1781                         ret = read;
1782                         goto out;
1783                 }
1784         }
1785
1786         /* read the non-space input */
1787         while (cnt && !isspace(ch) && ch) {
1788                 if (parser->idx < parser->size - 1)
1789                         parser->buffer[parser->idx++] = ch;
1790                 else {
1791                         ret = -EINVAL;
1792                         goto out;
1793                 }
1794                 ret = get_user(ch, ubuf++);
1795                 if (ret)
1796                         goto out;
1797                 read++;
1798                 cnt--;
1799         }
1800
1801         /* We either got finished input or we have to wait for another call. */
1802         if (isspace(ch) || !ch) {
1803                 parser->buffer[parser->idx] = 0;
1804                 parser->cont = false;
1805         } else if (parser->idx < parser->size - 1) {
1806                 parser->cont = true;
1807                 parser->buffer[parser->idx++] = ch;
1808                 /* Make sure the parsed string always terminates with '\0'. */
1809                 parser->buffer[parser->idx] = 0;
1810         } else {
1811                 ret = -EINVAL;
1812                 goto out;
1813         }
1814
1815         *ppos += read;
1816         ret = read;
1817
1818 out:
1819         return ret;
1820 }
1821
1822 /* TODO add a seq_buf_to_buffer() */
1823 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1824 {
1825         int len;
1826
1827         if (trace_seq_used(s) <= s->readpos)
1828                 return -EBUSY;
1829
1830         len = trace_seq_used(s) - s->readpos;
1831         if (cnt > len)
1832                 cnt = len;
1833         memcpy(buf, s->buffer + s->readpos, cnt);
1834
1835         s->readpos += cnt;
1836         return cnt;
1837 }
1838
1839 unsigned long __read_mostly     tracing_thresh;
1840
1841 #ifdef CONFIG_TRACER_MAX_TRACE
1842 static const struct file_operations tracing_max_lat_fops;
1843
1844 #ifdef LATENCY_FS_NOTIFY
1845
1846 static struct workqueue_struct *fsnotify_wq;
1847
1848 static void latency_fsnotify_workfn(struct work_struct *work)
1849 {
1850         struct trace_array *tr = container_of(work, struct trace_array,
1851                                               fsnotify_work);
1852         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1853 }
1854
1855 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1856 {
1857         struct trace_array *tr = container_of(iwork, struct trace_array,
1858                                               fsnotify_irqwork);
1859         queue_work(fsnotify_wq, &tr->fsnotify_work);
1860 }
1861
1862 static void trace_create_maxlat_file(struct trace_array *tr,
1863                                      struct dentry *d_tracer)
1864 {
1865         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1866         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1867         tr->d_max_latency = trace_create_file("tracing_max_latency",
1868                                               TRACE_MODE_WRITE,
1869                                               d_tracer, tr,
1870                                               &tracing_max_lat_fops);
1871 }
1872
1873 __init static int latency_fsnotify_init(void)
1874 {
1875         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1876                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1877         if (!fsnotify_wq) {
1878                 pr_err("Unable to allocate tr_max_lat_wq\n");
1879                 return -ENOMEM;
1880         }
1881         return 0;
1882 }
1883
1884 late_initcall_sync(latency_fsnotify_init);
1885
1886 void latency_fsnotify(struct trace_array *tr)
1887 {
1888         if (!fsnotify_wq)
1889                 return;
1890         /*
1891          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1892          * possible that we are called from __schedule() or do_idle(), which
1893          * could cause a deadlock.
1894          */
1895         irq_work_queue(&tr->fsnotify_irqwork);
1896 }
1897
1898 #else /* !LATENCY_FS_NOTIFY */
1899
1900 #define trace_create_maxlat_file(tr, d_tracer)                          \
1901         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1902                           d_tracer, tr, &tracing_max_lat_fops)
1903
1904 #endif
1905
1906 /*
1907  * Copy the new maximum trace into the separate maximum-trace
1908  * structure. (this way the maximum trace is permanently saved,
1909  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1910  */
1911 static void
1912 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1913 {
1914         struct array_buffer *trace_buf = &tr->array_buffer;
1915         struct array_buffer *max_buf = &tr->max_buffer;
1916         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1917         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1918
1919         max_buf->cpu = cpu;
1920         max_buf->time_start = data->preempt_timestamp;
1921
1922         max_data->saved_latency = tr->max_latency;
1923         max_data->critical_start = data->critical_start;
1924         max_data->critical_end = data->critical_end;
1925
1926         strscpy(max_data->comm, tsk->comm);
1927         max_data->pid = tsk->pid;
1928         /*
1929          * If tsk == current, then use current_uid(), as that does not use
1930          * RCU. The irq tracer can be called out of RCU scope.
1931          */
1932         if (tsk == current)
1933                 max_data->uid = current_uid();
1934         else
1935                 max_data->uid = task_uid(tsk);
1936
1937         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1938         max_data->policy = tsk->policy;
1939         max_data->rt_priority = tsk->rt_priority;
1940
1941         /* record this tasks comm */
1942         tracing_record_cmdline(tsk);
1943         latency_fsnotify(tr);
1944 }
1945
1946 /**
1947  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1948  * @tr: tracer
1949  * @tsk: the task with the latency
1950  * @cpu: The cpu that initiated the trace.
1951  * @cond_data: User data associated with a conditional snapshot
1952  *
1953  * Flip the buffers between the @tr and the max_tr and record information
1954  * about which task was the cause of this latency.
1955  */
1956 void
1957 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1958               void *cond_data)
1959 {
1960         if (tr->stop_count)
1961                 return;
1962
1963         WARN_ON_ONCE(!irqs_disabled());
1964
1965         if (!tr->allocated_snapshot) {
1966                 /* Only the nop tracer should hit this when disabling */
1967                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1968                 return;
1969         }
1970
1971         arch_spin_lock(&tr->max_lock);
1972
1973         /* Inherit the recordable setting from array_buffer */
1974         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1975                 ring_buffer_record_on(tr->max_buffer.buffer);
1976         else
1977                 ring_buffer_record_off(tr->max_buffer.buffer);
1978
1979 #ifdef CONFIG_TRACER_SNAPSHOT
1980         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1981                 arch_spin_unlock(&tr->max_lock);
1982                 return;
1983         }
1984 #endif
1985         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1986
1987         __update_max_tr(tr, tsk, cpu);
1988
1989         arch_spin_unlock(&tr->max_lock);
1990
1991         /* Any waiters on the old snapshot buffer need to wake up */
1992         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1993 }
1994
1995 /**
1996  * update_max_tr_single - only copy one trace over, and reset the rest
1997  * @tr: tracer
1998  * @tsk: task with the latency
1999  * @cpu: the cpu of the buffer to copy.
2000  *
2001  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2002  */
2003 void
2004 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2005 {
2006         int ret;
2007
2008         if (tr->stop_count)
2009                 return;
2010
2011         WARN_ON_ONCE(!irqs_disabled());
2012         if (!tr->allocated_snapshot) {
2013                 /* Only the nop tracer should hit this when disabling */
2014                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2015                 return;
2016         }
2017
2018         arch_spin_lock(&tr->max_lock);
2019
2020         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2021
2022         if (ret == -EBUSY) {
2023                 /*
2024                  * We failed to swap the buffer due to a commit taking
2025                  * place on this CPU. We fail to record, but we reset
2026                  * the max trace buffer (no one writes directly to it)
2027                  * and flag that it failed.
2028                  * Another reason is resize is in progress.
2029                  */
2030                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2031                         "Failed to swap buffers due to commit or resize in progress\n");
2032         }
2033
2034         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2035
2036         __update_max_tr(tr, tsk, cpu);
2037         arch_spin_unlock(&tr->max_lock);
2038 }
2039
2040 #endif /* CONFIG_TRACER_MAX_TRACE */
2041
2042 struct pipe_wait {
2043         struct trace_iterator           *iter;
2044         int                             wait_index;
2045 };
2046
2047 static bool wait_pipe_cond(void *data)
2048 {
2049         struct pipe_wait *pwait = data;
2050         struct trace_iterator *iter = pwait->iter;
2051
2052         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2053                 return true;
2054
2055         return iter->closed;
2056 }
2057
2058 static int wait_on_pipe(struct trace_iterator *iter, int full)
2059 {
2060         struct pipe_wait pwait;
2061         int ret;
2062
2063         /* Iterators are static, they should be filled or empty */
2064         if (trace_buffer_iter(iter, iter->cpu_file))
2065                 return 0;
2066
2067         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2068         pwait.iter = iter;
2069
2070         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2071                                wait_pipe_cond, &pwait);
2072
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074         /*
2075          * Make sure this is still the snapshot buffer, as if a snapshot were
2076          * to happen, this would now be the main buffer.
2077          */
2078         if (iter->snapshot)
2079                 iter->array_buffer = &iter->tr->max_buffer;
2080 #endif
2081         return ret;
2082 }
2083
2084 #ifdef CONFIG_FTRACE_STARTUP_TEST
2085 static bool selftests_can_run;
2086
2087 struct trace_selftests {
2088         struct list_head                list;
2089         struct tracer                   *type;
2090 };
2091
2092 static LIST_HEAD(postponed_selftests);
2093
2094 static int save_selftest(struct tracer *type)
2095 {
2096         struct trace_selftests *selftest;
2097
2098         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2099         if (!selftest)
2100                 return -ENOMEM;
2101
2102         selftest->type = type;
2103         list_add(&selftest->list, &postponed_selftests);
2104         return 0;
2105 }
2106
2107 static int run_tracer_selftest(struct tracer *type)
2108 {
2109         struct trace_array *tr = &global_trace;
2110         struct tracer *saved_tracer = tr->current_trace;
2111         int ret;
2112
2113         if (!type->selftest || tracing_selftest_disabled)
2114                 return 0;
2115
2116         /*
2117          * If a tracer registers early in boot up (before scheduling is
2118          * initialized and such), then do not run its selftests yet.
2119          * Instead, run it a little later in the boot process.
2120          */
2121         if (!selftests_can_run)
2122                 return save_selftest(type);
2123
2124         if (!tracing_is_on()) {
2125                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2126                         type->name);
2127                 return 0;
2128         }
2129
2130         /*
2131          * Run a selftest on this tracer.
2132          * Here we reset the trace buffer, and set the current
2133          * tracer to be this tracer. The tracer can then run some
2134          * internal tracing to verify that everything is in order.
2135          * If we fail, we do not register this tracer.
2136          */
2137         tracing_reset_online_cpus(&tr->array_buffer);
2138
2139         tr->current_trace = type;
2140
2141 #ifdef CONFIG_TRACER_MAX_TRACE
2142         if (type->use_max_tr) {
2143                 /* If we expanded the buffers, make sure the max is expanded too */
2144                 if (tr->ring_buffer_expanded)
2145                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2146                                            RING_BUFFER_ALL_CPUS);
2147                 tr->allocated_snapshot = true;
2148         }
2149 #endif
2150
2151         /* the test is responsible for initializing and enabling */
2152         pr_info("Testing tracer %s: ", type->name);
2153         ret = type->selftest(type, tr);
2154         /* the test is responsible for resetting too */
2155         tr->current_trace = saved_tracer;
2156         if (ret) {
2157                 printk(KERN_CONT "FAILED!\n");
2158                 /* Add the warning after printing 'FAILED' */
2159                 WARN_ON(1);
2160                 return -1;
2161         }
2162         /* Only reset on passing, to avoid touching corrupted buffers */
2163         tracing_reset_online_cpus(&tr->array_buffer);
2164
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166         if (type->use_max_tr) {
2167                 tr->allocated_snapshot = false;
2168
2169                 /* Shrink the max buffer again */
2170                 if (tr->ring_buffer_expanded)
2171                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2172                                            RING_BUFFER_ALL_CPUS);
2173         }
2174 #endif
2175
2176         printk(KERN_CONT "PASSED\n");
2177         return 0;
2178 }
2179
2180 static int do_run_tracer_selftest(struct tracer *type)
2181 {
2182         int ret;
2183
2184         /*
2185          * Tests can take a long time, especially if they are run one after the
2186          * other, as does happen during bootup when all the tracers are
2187          * registered. This could cause the soft lockup watchdog to trigger.
2188          */
2189         cond_resched();
2190
2191         tracing_selftest_running = true;
2192         ret = run_tracer_selftest(type);
2193         tracing_selftest_running = false;
2194
2195         return ret;
2196 }
2197
2198 static __init int init_trace_selftests(void)
2199 {
2200         struct trace_selftests *p, *n;
2201         struct tracer *t, **last;
2202         int ret;
2203
2204         selftests_can_run = true;
2205
2206         mutex_lock(&trace_types_lock);
2207
2208         if (list_empty(&postponed_selftests))
2209                 goto out;
2210
2211         pr_info("Running postponed tracer tests:\n");
2212
2213         tracing_selftest_running = true;
2214         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2215                 /* This loop can take minutes when sanitizers are enabled, so
2216                  * lets make sure we allow RCU processing.
2217                  */
2218                 cond_resched();
2219                 ret = run_tracer_selftest(p->type);
2220                 /* If the test fails, then warn and remove from available_tracers */
2221                 if (ret < 0) {
2222                         WARN(1, "tracer: %s failed selftest, disabling\n",
2223                              p->type->name);
2224                         last = &trace_types;
2225                         for (t = trace_types; t; t = t->next) {
2226                                 if (t == p->type) {
2227                                         *last = t->next;
2228                                         break;
2229                                 }
2230                                 last = &t->next;
2231                         }
2232                 }
2233                 list_del(&p->list);
2234                 kfree(p);
2235         }
2236         tracing_selftest_running = false;
2237
2238  out:
2239         mutex_unlock(&trace_types_lock);
2240
2241         return 0;
2242 }
2243 core_initcall(init_trace_selftests);
2244 #else
2245 static inline int do_run_tracer_selftest(struct tracer *type)
2246 {
2247         return 0;
2248 }
2249 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2250
2251 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2252
2253 static void __init apply_trace_boot_options(void);
2254
2255 /**
2256  * register_tracer - register a tracer with the ftrace system.
2257  * @type: the plugin for the tracer
2258  *
2259  * Register a new plugin tracer.
2260  */
2261 int __init register_tracer(struct tracer *type)
2262 {
2263         struct tracer *t;
2264         int ret = 0;
2265
2266         if (!type->name) {
2267                 pr_info("Tracer must have a name\n");
2268                 return -1;
2269         }
2270
2271         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2272                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2273                 return -1;
2274         }
2275
2276         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2277                 pr_warn("Can not register tracer %s due to lockdown\n",
2278                            type->name);
2279                 return -EPERM;
2280         }
2281
2282         mutex_lock(&trace_types_lock);
2283
2284         for (t = trace_types; t; t = t->next) {
2285                 if (strcmp(type->name, t->name) == 0) {
2286                         /* already found */
2287                         pr_info("Tracer %s already registered\n",
2288                                 type->name);
2289                         ret = -1;
2290                         goto out;
2291                 }
2292         }
2293
2294         if (!type->set_flag)
2295                 type->set_flag = &dummy_set_flag;
2296         if (!type->flags) {
2297                 /*allocate a dummy tracer_flags*/
2298                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2299                 if (!type->flags) {
2300                         ret = -ENOMEM;
2301                         goto out;
2302                 }
2303                 type->flags->val = 0;
2304                 type->flags->opts = dummy_tracer_opt;
2305         } else
2306                 if (!type->flags->opts)
2307                         type->flags->opts = dummy_tracer_opt;
2308
2309         /* store the tracer for __set_tracer_option */
2310         type->flags->trace = type;
2311
2312         ret = do_run_tracer_selftest(type);
2313         if (ret < 0)
2314                 goto out;
2315
2316         type->next = trace_types;
2317         trace_types = type;
2318         add_tracer_options(&global_trace, type);
2319
2320  out:
2321         mutex_unlock(&trace_types_lock);
2322
2323         if (ret || !default_bootup_tracer)
2324                 goto out_unlock;
2325
2326         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2327                 goto out_unlock;
2328
2329         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2330         /* Do we want this tracer to start on bootup? */
2331         tracing_set_tracer(&global_trace, type->name);
2332         default_bootup_tracer = NULL;
2333
2334         apply_trace_boot_options();
2335
2336         /* disable other selftests, since this will break it. */
2337         disable_tracing_selftest("running a tracer");
2338
2339  out_unlock:
2340         return ret;
2341 }
2342
2343 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2344 {
2345         struct trace_buffer *buffer = buf->buffer;
2346
2347         if (!buffer)
2348                 return;
2349
2350         ring_buffer_record_disable(buffer);
2351
2352         /* Make sure all commits have finished */
2353         synchronize_rcu();
2354         ring_buffer_reset_cpu(buffer, cpu);
2355
2356         ring_buffer_record_enable(buffer);
2357 }
2358
2359 void tracing_reset_online_cpus(struct array_buffer *buf)
2360 {
2361         struct trace_buffer *buffer = buf->buffer;
2362
2363         if (!buffer)
2364                 return;
2365
2366         ring_buffer_record_disable(buffer);
2367
2368         /* Make sure all commits have finished */
2369         synchronize_rcu();
2370
2371         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2372
2373         ring_buffer_reset_online_cpus(buffer);
2374
2375         ring_buffer_record_enable(buffer);
2376 }
2377
2378 static void tracing_reset_all_cpus(struct array_buffer *buf)
2379 {
2380         struct trace_buffer *buffer = buf->buffer;
2381
2382         if (!buffer)
2383                 return;
2384
2385         ring_buffer_record_disable(buffer);
2386
2387         /* Make sure all commits have finished */
2388         synchronize_rcu();
2389
2390         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2391
2392         ring_buffer_reset(buffer);
2393
2394         ring_buffer_record_enable(buffer);
2395 }
2396
2397 /* Must have trace_types_lock held */
2398 void tracing_reset_all_online_cpus_unlocked(void)
2399 {
2400         struct trace_array *tr;
2401
2402         lockdep_assert_held(&trace_types_lock);
2403
2404         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2405                 if (!tr->clear_trace)
2406                         continue;
2407                 tr->clear_trace = false;
2408                 tracing_reset_online_cpus(&tr->array_buffer);
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410                 tracing_reset_online_cpus(&tr->max_buffer);
2411 #endif
2412         }
2413 }
2414
2415 void tracing_reset_all_online_cpus(void)
2416 {
2417         mutex_lock(&trace_types_lock);
2418         tracing_reset_all_online_cpus_unlocked();
2419         mutex_unlock(&trace_types_lock);
2420 }
2421
2422 int is_tracing_stopped(void)
2423 {
2424         return global_trace.stop_count;
2425 }
2426
2427 static void tracing_start_tr(struct trace_array *tr)
2428 {
2429         struct trace_buffer *buffer;
2430         unsigned long flags;
2431
2432         if (tracing_disabled)
2433                 return;
2434
2435         raw_spin_lock_irqsave(&tr->start_lock, flags);
2436         if (--tr->stop_count) {
2437                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2438                         /* Someone screwed up their debugging */
2439                         tr->stop_count = 0;
2440                 }
2441                 goto out;
2442         }
2443
2444         /* Prevent the buffers from switching */
2445         arch_spin_lock(&tr->max_lock);
2446
2447         buffer = tr->array_buffer.buffer;
2448         if (buffer)
2449                 ring_buffer_record_enable(buffer);
2450
2451 #ifdef CONFIG_TRACER_MAX_TRACE
2452         buffer = tr->max_buffer.buffer;
2453         if (buffer)
2454                 ring_buffer_record_enable(buffer);
2455 #endif
2456
2457         arch_spin_unlock(&tr->max_lock);
2458
2459  out:
2460         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2461 }
2462
2463 /**
2464  * tracing_start - quick start of the tracer
2465  *
2466  * If tracing is enabled but was stopped by tracing_stop,
2467  * this will start the tracer back up.
2468  */
2469 void tracing_start(void)
2470
2471 {
2472         return tracing_start_tr(&global_trace);
2473 }
2474
2475 static void tracing_stop_tr(struct trace_array *tr)
2476 {
2477         struct trace_buffer *buffer;
2478         unsigned long flags;
2479
2480         raw_spin_lock_irqsave(&tr->start_lock, flags);
2481         if (tr->stop_count++)
2482                 goto out;
2483
2484         /* Prevent the buffers from switching */
2485         arch_spin_lock(&tr->max_lock);
2486
2487         buffer = tr->array_buffer.buffer;
2488         if (buffer)
2489                 ring_buffer_record_disable(buffer);
2490
2491 #ifdef CONFIG_TRACER_MAX_TRACE
2492         buffer = tr->max_buffer.buffer;
2493         if (buffer)
2494                 ring_buffer_record_disable(buffer);
2495 #endif
2496
2497         arch_spin_unlock(&tr->max_lock);
2498
2499  out:
2500         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2501 }
2502
2503 /**
2504  * tracing_stop - quick stop of the tracer
2505  *
2506  * Light weight way to stop tracing. Use in conjunction with
2507  * tracing_start.
2508  */
2509 void tracing_stop(void)
2510 {
2511         return tracing_stop_tr(&global_trace);
2512 }
2513
2514 /*
2515  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2516  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2517  * simplifies those functions and keeps them in sync.
2518  */
2519 enum print_line_t trace_handle_return(struct trace_seq *s)
2520 {
2521         return trace_seq_has_overflowed(s) ?
2522                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_handle_return);
2525
2526 static unsigned short migration_disable_value(void)
2527 {
2528 #if defined(CONFIG_SMP)
2529         return current->migration_disabled;
2530 #else
2531         return 0;
2532 #endif
2533 }
2534
2535 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2536 {
2537         unsigned int trace_flags = irqs_status;
2538         unsigned int pc;
2539
2540         pc = preempt_count();
2541
2542         if (pc & NMI_MASK)
2543                 trace_flags |= TRACE_FLAG_NMI;
2544         if (pc & HARDIRQ_MASK)
2545                 trace_flags |= TRACE_FLAG_HARDIRQ;
2546         if (in_serving_softirq())
2547                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2548         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2549                 trace_flags |= TRACE_FLAG_BH_OFF;
2550
2551         if (tif_need_resched())
2552                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2553         if (test_preempt_need_resched())
2554                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2555         if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2556                 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2557         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2558                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2559 }
2560
2561 struct ring_buffer_event *
2562 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2563                           int type,
2564                           unsigned long len,
2565                           unsigned int trace_ctx)
2566 {
2567         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2568 }
2569
2570 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2571 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2572 static int trace_buffered_event_ref;
2573
2574 /**
2575  * trace_buffered_event_enable - enable buffering events
2576  *
2577  * When events are being filtered, it is quicker to use a temporary
2578  * buffer to write the event data into if there's a likely chance
2579  * that it will not be committed. The discard of the ring buffer
2580  * is not as fast as committing, and is much slower than copying
2581  * a commit.
2582  *
2583  * When an event is to be filtered, allocate per cpu buffers to
2584  * write the event data into, and if the event is filtered and discarded
2585  * it is simply dropped, otherwise, the entire data is to be committed
2586  * in one shot.
2587  */
2588 void trace_buffered_event_enable(void)
2589 {
2590         struct ring_buffer_event *event;
2591         struct page *page;
2592         int cpu;
2593
2594         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2595
2596         if (trace_buffered_event_ref++)
2597                 return;
2598
2599         for_each_tracing_cpu(cpu) {
2600                 page = alloc_pages_node(cpu_to_node(cpu),
2601                                         GFP_KERNEL | __GFP_NORETRY, 0);
2602                 /* This is just an optimization and can handle failures */
2603                 if (!page) {
2604                         pr_err("Failed to allocate event buffer\n");
2605                         break;
2606                 }
2607
2608                 event = page_address(page);
2609                 memset(event, 0, sizeof(*event));
2610
2611                 per_cpu(trace_buffered_event, cpu) = event;
2612
2613                 preempt_disable();
2614                 if (cpu == smp_processor_id() &&
2615                     __this_cpu_read(trace_buffered_event) !=
2616                     per_cpu(trace_buffered_event, cpu))
2617                         WARN_ON_ONCE(1);
2618                 preempt_enable();
2619         }
2620 }
2621
2622 static void enable_trace_buffered_event(void *data)
2623 {
2624         /* Probably not needed, but do it anyway */
2625         smp_rmb();
2626         this_cpu_dec(trace_buffered_event_cnt);
2627 }
2628
2629 static void disable_trace_buffered_event(void *data)
2630 {
2631         this_cpu_inc(trace_buffered_event_cnt);
2632 }
2633
2634 /**
2635  * trace_buffered_event_disable - disable buffering events
2636  *
2637  * When a filter is removed, it is faster to not use the buffered
2638  * events, and to commit directly into the ring buffer. Free up
2639  * the temp buffers when there are no more users. This requires
2640  * special synchronization with current events.
2641  */
2642 void trace_buffered_event_disable(void)
2643 {
2644         int cpu;
2645
2646         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647
2648         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2649                 return;
2650
2651         if (--trace_buffered_event_ref)
2652                 return;
2653
2654         /* For each CPU, set the buffer as used. */
2655         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2656                          NULL, true);
2657
2658         /* Wait for all current users to finish */
2659         synchronize_rcu();
2660
2661         for_each_tracing_cpu(cpu) {
2662                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2663                 per_cpu(trace_buffered_event, cpu) = NULL;
2664         }
2665
2666         /*
2667          * Wait for all CPUs that potentially started checking if they can use
2668          * their event buffer only after the previous synchronize_rcu() call and
2669          * they still read a valid pointer from trace_buffered_event. It must be
2670          * ensured they don't see cleared trace_buffered_event_cnt else they
2671          * could wrongly decide to use the pointed-to buffer which is now freed.
2672          */
2673         synchronize_rcu();
2674
2675         /* For each CPU, relinquish the buffer */
2676         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2677                          true);
2678 }
2679
2680 static struct trace_buffer *temp_buffer;
2681
2682 struct ring_buffer_event *
2683 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2684                           struct trace_event_file *trace_file,
2685                           int type, unsigned long len,
2686                           unsigned int trace_ctx)
2687 {
2688         struct ring_buffer_event *entry;
2689         struct trace_array *tr = trace_file->tr;
2690         int val;
2691
2692         *current_rb = tr->array_buffer.buffer;
2693
2694         if (!tr->no_filter_buffering_ref &&
2695             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2696                 preempt_disable_notrace();
2697                 /*
2698                  * Filtering is on, so try to use the per cpu buffer first.
2699                  * This buffer will simulate a ring_buffer_event,
2700                  * where the type_len is zero and the array[0] will
2701                  * hold the full length.
2702                  * (see include/linux/ring-buffer.h for details on
2703                  *  how the ring_buffer_event is structured).
2704                  *
2705                  * Using a temp buffer during filtering and copying it
2706                  * on a matched filter is quicker than writing directly
2707                  * into the ring buffer and then discarding it when
2708                  * it doesn't match. That is because the discard
2709                  * requires several atomic operations to get right.
2710                  * Copying on match and doing nothing on a failed match
2711                  * is still quicker than no copy on match, but having
2712                  * to discard out of the ring buffer on a failed match.
2713                  */
2714                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2715                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2716
2717                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2718
2719                         /*
2720                          * Preemption is disabled, but interrupts and NMIs
2721                          * can still come in now. If that happens after
2722                          * the above increment, then it will have to go
2723                          * back to the old method of allocating the event
2724                          * on the ring buffer, and if the filter fails, it
2725                          * will have to call ring_buffer_discard_commit()
2726                          * to remove it.
2727                          *
2728                          * Need to also check the unlikely case that the
2729                          * length is bigger than the temp buffer size.
2730                          * If that happens, then the reserve is pretty much
2731                          * guaranteed to fail, as the ring buffer currently
2732                          * only allows events less than a page. But that may
2733                          * change in the future, so let the ring buffer reserve
2734                          * handle the failure in that case.
2735                          */
2736                         if (val == 1 && likely(len <= max_len)) {
2737                                 trace_event_setup(entry, type, trace_ctx);
2738                                 entry->array[0] = len;
2739                                 /* Return with preemption disabled */
2740                                 return entry;
2741                         }
2742                         this_cpu_dec(trace_buffered_event_cnt);
2743                 }
2744                 /* __trace_buffer_lock_reserve() disables preemption */
2745                 preempt_enable_notrace();
2746         }
2747
2748         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2749                                             trace_ctx);
2750         /*
2751          * If tracing is off, but we have triggers enabled
2752          * we still need to look at the event data. Use the temp_buffer
2753          * to store the trace event for the trigger to use. It's recursive
2754          * safe and will not be recorded anywhere.
2755          */
2756         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757                 *current_rb = temp_buffer;
2758                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2759                                                     trace_ctx);
2760         }
2761         return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764
2765 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767
2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770         struct trace_event_call *event_call;
2771         struct trace_event_file *file;
2772         struct trace_event *event;
2773         unsigned long flags;
2774         struct trace_iterator *iter = tracepoint_print_iter;
2775
2776         /* We should never get here if iter is NULL */
2777         if (WARN_ON_ONCE(!iter))
2778                 return;
2779
2780         event_call = fbuffer->trace_file->event_call;
2781         if (!event_call || !event_call->event.funcs ||
2782             !event_call->event.funcs->trace)
2783                 return;
2784
2785         file = fbuffer->trace_file;
2786         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788              !filter_match_preds(file->filter, fbuffer->entry)))
2789                 return;
2790
2791         event = &fbuffer->trace_file->event_call->event;
2792
2793         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794         trace_seq_init(&iter->seq);
2795         iter->ent = fbuffer->entry;
2796         event_call->event.funcs->trace(iter, 0, event);
2797         trace_seq_putc(&iter->seq, 0);
2798         printk("%s", iter->seq.buffer);
2799
2800         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802
2803 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2804                              void *buffer, size_t *lenp,
2805                              loff_t *ppos)
2806 {
2807         int save_tracepoint_printk;
2808         int ret;
2809
2810         mutex_lock(&tracepoint_printk_mutex);
2811         save_tracepoint_printk = tracepoint_printk;
2812
2813         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814
2815         /*
2816          * This will force exiting early, as tracepoint_printk
2817          * is always zero when tracepoint_printk_iter is not allocated
2818          */
2819         if (!tracepoint_print_iter)
2820                 tracepoint_printk = 0;
2821
2822         if (save_tracepoint_printk == tracepoint_printk)
2823                 goto out;
2824
2825         if (tracepoint_printk)
2826                 static_key_enable(&tracepoint_printk_key.key);
2827         else
2828                 static_key_disable(&tracepoint_printk_key.key);
2829
2830  out:
2831         mutex_unlock(&tracepoint_printk_mutex);
2832
2833         return ret;
2834 }
2835
2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838         enum event_trigger_type tt = ETT_NONE;
2839         struct trace_event_file *file = fbuffer->trace_file;
2840
2841         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2842                         fbuffer->entry, &tt))
2843                 goto discard;
2844
2845         if (static_key_false(&tracepoint_printk_key.key))
2846                 output_printk(fbuffer);
2847
2848         if (static_branch_unlikely(&trace_event_exports_enabled))
2849                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2850
2851         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2852                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2853
2854 discard:
2855         if (tt)
2856                 event_triggers_post_call(file, tt);
2857
2858 }
2859 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2860
2861 /*
2862  * Skip 3:
2863  *
2864  *   trace_buffer_unlock_commit_regs()
2865  *   trace_event_buffer_commit()
2866  *   trace_event_raw_event_xxx()
2867  */
2868 # define STACK_SKIP 3
2869
2870 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2871                                      struct trace_buffer *buffer,
2872                                      struct ring_buffer_event *event,
2873                                      unsigned int trace_ctx,
2874                                      struct pt_regs *regs)
2875 {
2876         __buffer_unlock_commit(buffer, event);
2877
2878         /*
2879          * If regs is not set, then skip the necessary functions.
2880          * Note, we can still get here via blktrace, wakeup tracer
2881          * and mmiotrace, but that's ok if they lose a function or
2882          * two. They are not that meaningful.
2883          */
2884         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2885         ftrace_trace_userstack(tr, buffer, trace_ctx);
2886 }
2887
2888 /*
2889  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2890  */
2891 void
2892 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2893                                    struct ring_buffer_event *event)
2894 {
2895         __buffer_unlock_commit(buffer, event);
2896 }
2897
2898 void
2899 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2900                parent_ip, unsigned int trace_ctx)
2901 {
2902         struct trace_buffer *buffer = tr->array_buffer.buffer;
2903         struct ring_buffer_event *event;
2904         struct ftrace_entry *entry;
2905
2906         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2907                                             trace_ctx);
2908         if (!event)
2909                 return;
2910         entry   = ring_buffer_event_data(event);
2911         entry->ip                       = ip;
2912         entry->parent_ip                = parent_ip;
2913
2914         if (static_branch_unlikely(&trace_function_exports_enabled))
2915                 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916         __buffer_unlock_commit(buffer, event);
2917 }
2918
2919 #ifdef CONFIG_STACKTRACE
2920
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING   4
2923
2924 #define FTRACE_KSTACK_ENTRIES   (SZ_4K / FTRACE_KSTACK_NESTING)
2925
2926 struct ftrace_stack {
2927         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929
2930
2931 struct ftrace_stacks {
2932         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2933 };
2934
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937
2938 static void __ftrace_trace_stack(struct trace_array *tr,
2939                                  struct trace_buffer *buffer,
2940                                  unsigned int trace_ctx,
2941                                  int skip, struct pt_regs *regs)
2942 {
2943         struct ring_buffer_event *event;
2944         unsigned int size, nr_entries;
2945         struct ftrace_stack *fstack;
2946         struct stack_entry *entry;
2947         int stackidx;
2948
2949         /*
2950          * Add one, for this function and the call to save_stack_trace()
2951          * If regs is set, then these functions will not be in the way.
2952          */
2953 #ifndef CONFIG_UNWINDER_ORC
2954         if (!regs)
2955                 skip++;
2956 #endif
2957
2958         preempt_disable_notrace();
2959
2960         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961
2962         /* This should never happen. If it does, yell once and skip */
2963         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964                 goto out;
2965
2966         /*
2967          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968          * interrupt will either see the value pre increment or post
2969          * increment. If the interrupt happens pre increment it will have
2970          * restored the counter when it returns.  We just need a barrier to
2971          * keep gcc from moving things around.
2972          */
2973         barrier();
2974
2975         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976         size = ARRAY_SIZE(fstack->calls);
2977
2978         if (regs) {
2979                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980                                                    size, skip);
2981         } else {
2982                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2983         }
2984
2985 #ifdef CONFIG_DYNAMIC_FTRACE
2986         /* Mark entry of stack trace as trampoline code */
2987         if (tr->ops && tr->ops->trampoline) {
2988                 unsigned long tramp_start = tr->ops->trampoline;
2989                 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2990                 unsigned long *calls = fstack->calls;
2991
2992                 for (int i = 0; i < nr_entries; i++) {
2993                         if (calls[i] >= tramp_start && calls[i] < tramp_end)
2994                                 calls[i] = FTRACE_TRAMPOLINE_MARKER;
2995                 }
2996         }
2997 #endif
2998
2999         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3000                                     struct_size(entry, caller, nr_entries),
3001                                     trace_ctx);
3002         if (!event)
3003                 goto out;
3004         entry = ring_buffer_event_data(event);
3005
3006         entry->size = nr_entries;
3007         memcpy(&entry->caller, fstack->calls,
3008                flex_array_size(entry, caller, nr_entries));
3009
3010         __buffer_unlock_commit(buffer, event);
3011
3012  out:
3013         /* Again, don't let gcc optimize things here */
3014         barrier();
3015         __this_cpu_dec(ftrace_stack_reserve);
3016         preempt_enable_notrace();
3017
3018 }
3019
3020 static inline void ftrace_trace_stack(struct trace_array *tr,
3021                                       struct trace_buffer *buffer,
3022                                       unsigned int trace_ctx,
3023                                       int skip, struct pt_regs *regs)
3024 {
3025         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3026                 return;
3027
3028         __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3029 }
3030
3031 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3032                    int skip)
3033 {
3034         struct trace_buffer *buffer = tr->array_buffer.buffer;
3035
3036         if (rcu_is_watching()) {
3037                 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038                 return;
3039         }
3040
3041         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3042                 return;
3043
3044         /*
3045          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3046          * but if the above rcu_is_watching() failed, then the NMI
3047          * triggered someplace critical, and ct_irq_enter() should
3048          * not be called from NMI.
3049          */
3050         if (unlikely(in_nmi()))
3051                 return;
3052
3053         ct_irq_enter_irqson();
3054         __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3055         ct_irq_exit_irqson();
3056 }
3057
3058 /**
3059  * trace_dump_stack - record a stack back trace in the trace buffer
3060  * @skip: Number of functions to skip (helper handlers)
3061  */
3062 void trace_dump_stack(int skip)
3063 {
3064         if (tracing_disabled || tracing_selftest_running)
3065                 return;
3066
3067 #ifndef CONFIG_UNWINDER_ORC
3068         /* Skip 1 to skip this function. */
3069         skip++;
3070 #endif
3071         __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3072                                 tracing_gen_ctx(), skip, NULL);
3073 }
3074 EXPORT_SYMBOL_GPL(trace_dump_stack);
3075
3076 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3077 static DEFINE_PER_CPU(int, user_stack_count);
3078
3079 static void
3080 ftrace_trace_userstack(struct trace_array *tr,
3081                        struct trace_buffer *buffer, unsigned int trace_ctx)
3082 {
3083         struct ring_buffer_event *event;
3084         struct userstack_entry *entry;
3085
3086         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087                 return;
3088
3089         /*
3090          * NMIs can not handle page faults, even with fix ups.
3091          * The save user stack can (and often does) fault.
3092          */
3093         if (unlikely(in_nmi()))
3094                 return;
3095
3096         /*
3097          * prevent recursion, since the user stack tracing may
3098          * trigger other kernel events.
3099          */
3100         preempt_disable();
3101         if (__this_cpu_read(user_stack_count))
3102                 goto out;
3103
3104         __this_cpu_inc(user_stack_count);
3105
3106         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107                                             sizeof(*entry), trace_ctx);
3108         if (!event)
3109                 goto out_drop_count;
3110         entry   = ring_buffer_event_data(event);
3111
3112         entry->tgid             = current->tgid;
3113         memset(&entry->caller, 0, sizeof(entry->caller));
3114
3115         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116         __buffer_unlock_commit(buffer, event);
3117
3118  out_drop_count:
3119         __this_cpu_dec(user_stack_count);
3120  out:
3121         preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125                                    struct trace_buffer *buffer,
3126                                    unsigned int trace_ctx)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130
3131 #endif /* CONFIG_STACKTRACE */
3132
3133 static inline void
3134 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3135                           unsigned long long delta)
3136 {
3137         entry->bottom_delta_ts = delta & U32_MAX;
3138         entry->top_delta_ts = (delta >> 32);
3139 }
3140
3141 void trace_last_func_repeats(struct trace_array *tr,
3142                              struct trace_func_repeats *last_info,
3143                              unsigned int trace_ctx)
3144 {
3145         struct trace_buffer *buffer = tr->array_buffer.buffer;
3146         struct func_repeats_entry *entry;
3147         struct ring_buffer_event *event;
3148         u64 delta;
3149
3150         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3151                                             sizeof(*entry), trace_ctx);
3152         if (!event)
3153                 return;
3154
3155         delta = ring_buffer_event_time_stamp(buffer, event) -
3156                 last_info->ts_last_call;
3157
3158         entry = ring_buffer_event_data(event);
3159         entry->ip = last_info->ip;
3160         entry->parent_ip = last_info->parent_ip;
3161         entry->count = last_info->count;
3162         func_repeats_set_delta_ts(entry, delta);
3163
3164         __buffer_unlock_commit(buffer, event);
3165 }
3166
3167 /* created for use with alloc_percpu */
3168 struct trace_buffer_struct {
3169         int nesting;
3170         char buffer[4][TRACE_BUF_SIZE];
3171 };
3172
3173 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3174
3175 /*
3176  * This allows for lockless recording.  If we're nested too deeply, then
3177  * this returns NULL.
3178  */
3179 static char *get_trace_buf(void)
3180 {
3181         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3182
3183         if (!trace_percpu_buffer || buffer->nesting >= 4)
3184                 return NULL;
3185
3186         buffer->nesting++;
3187
3188         /* Interrupts must see nesting incremented before we use the buffer */
3189         barrier();
3190         return &buffer->buffer[buffer->nesting - 1][0];
3191 }
3192
3193 static void put_trace_buf(void)
3194 {
3195         /* Don't let the decrement of nesting leak before this */
3196         barrier();
3197         this_cpu_dec(trace_percpu_buffer->nesting);
3198 }
3199
3200 static int alloc_percpu_trace_buffer(void)
3201 {
3202         struct trace_buffer_struct __percpu *buffers;
3203
3204         if (trace_percpu_buffer)
3205                 return 0;
3206
3207         buffers = alloc_percpu(struct trace_buffer_struct);
3208         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3209                 return -ENOMEM;
3210
3211         trace_percpu_buffer = buffers;
3212         return 0;
3213 }
3214
3215 static int buffers_allocated;
3216
3217 void trace_printk_init_buffers(void)
3218 {
3219         if (buffers_allocated)
3220                 return;
3221
3222         if (alloc_percpu_trace_buffer())
3223                 return;
3224
3225         /* trace_printk() is for debug use only. Don't use it in production. */
3226
3227         pr_warn("\n");
3228         pr_warn("**********************************************************\n");
3229         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3230         pr_warn("**                                                      **\n");
3231         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3232         pr_warn("**                                                      **\n");
3233         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3234         pr_warn("** unsafe for production use.                           **\n");
3235         pr_warn("**                                                      **\n");
3236         pr_warn("** If you see this message and you are not debugging    **\n");
3237         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3238         pr_warn("**                                                      **\n");
3239         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3240         pr_warn("**********************************************************\n");
3241
3242         /* Expand the buffers to set size */
3243         tracing_update_buffers(&global_trace);
3244
3245         buffers_allocated = 1;
3246
3247         /*
3248          * trace_printk_init_buffers() can be called by modules.
3249          * If that happens, then we need to start cmdline recording
3250          * directly here. If the global_trace.buffer is already
3251          * allocated here, then this was called by module code.
3252          */
3253         if (global_trace.array_buffer.buffer)
3254                 tracing_start_cmdline_record();
3255 }
3256 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3257
3258 void trace_printk_start_comm(void)
3259 {
3260         /* Start tracing comms if trace printk is set */
3261         if (!buffers_allocated)
3262                 return;
3263         tracing_start_cmdline_record();
3264 }
3265
3266 static void trace_printk_start_stop_comm(int enabled)
3267 {
3268         if (!buffers_allocated)
3269                 return;
3270
3271         if (enabled)
3272                 tracing_start_cmdline_record();
3273         else
3274                 tracing_stop_cmdline_record();
3275 }
3276
3277 /**
3278  * trace_vbprintk - write binary msg to tracing buffer
3279  * @ip:    The address of the caller
3280  * @fmt:   The string format to write to the buffer
3281  * @args:  Arguments for @fmt
3282  */
3283 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3284 {
3285         struct ring_buffer_event *event;
3286         struct trace_buffer *buffer;
3287         struct trace_array *tr = READ_ONCE(printk_trace);
3288         struct bprint_entry *entry;
3289         unsigned int trace_ctx;
3290         char *tbuffer;
3291         int len = 0, size;
3292
3293         if (!printk_binsafe(tr))
3294                 return trace_vprintk(ip, fmt, args);
3295
3296         if (unlikely(tracing_selftest_running || tracing_disabled))
3297                 return 0;
3298
3299         /* Don't pollute graph traces with trace_vprintk internals */
3300         pause_graph_tracing();
3301
3302         trace_ctx = tracing_gen_ctx();
3303         preempt_disable_notrace();
3304
3305         tbuffer = get_trace_buf();
3306         if (!tbuffer) {
3307                 len = 0;
3308                 goto out_nobuffer;
3309         }
3310
3311         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3312
3313         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3314                 goto out_put;
3315
3316         size = sizeof(*entry) + sizeof(u32) * len;
3317         buffer = tr->array_buffer.buffer;
3318         ring_buffer_nest_start(buffer);
3319         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3320                                             trace_ctx);
3321         if (!event)
3322                 goto out;
3323         entry = ring_buffer_event_data(event);
3324         entry->ip                       = ip;
3325         entry->fmt                      = fmt;
3326
3327         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3328         __buffer_unlock_commit(buffer, event);
3329         ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3330
3331 out:
3332         ring_buffer_nest_end(buffer);
3333 out_put:
3334         put_trace_buf();
3335
3336 out_nobuffer:
3337         preempt_enable_notrace();
3338         unpause_graph_tracing();
3339
3340         return len;
3341 }
3342 EXPORT_SYMBOL_GPL(trace_vbprintk);
3343
3344 __printf(3, 0)
3345 static int
3346 __trace_array_vprintk(struct trace_buffer *buffer,
3347                       unsigned long ip, const char *fmt, va_list args)
3348 {
3349         struct ring_buffer_event *event;
3350         int len = 0, size;
3351         struct print_entry *entry;
3352         unsigned int trace_ctx;
3353         char *tbuffer;
3354
3355         if (tracing_disabled)
3356                 return 0;
3357
3358         /* Don't pollute graph traces with trace_vprintk internals */
3359         pause_graph_tracing();
3360
3361         trace_ctx = tracing_gen_ctx();
3362         preempt_disable_notrace();
3363
3364
3365         tbuffer = get_trace_buf();
3366         if (!tbuffer) {
3367                 len = 0;
3368                 goto out_nobuffer;
3369         }
3370
3371         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3372
3373         size = sizeof(*entry) + len + 1;
3374         ring_buffer_nest_start(buffer);
3375         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3376                                             trace_ctx);
3377         if (!event)
3378                 goto out;
3379         entry = ring_buffer_event_data(event);
3380         entry->ip = ip;
3381
3382         memcpy(&entry->buf, tbuffer, len + 1);
3383         __buffer_unlock_commit(buffer, event);
3384         ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3385
3386 out:
3387         ring_buffer_nest_end(buffer);
3388         put_trace_buf();
3389
3390 out_nobuffer:
3391         preempt_enable_notrace();
3392         unpause_graph_tracing();
3393
3394         return len;
3395 }
3396
3397 __printf(3, 0)
3398 int trace_array_vprintk(struct trace_array *tr,
3399                         unsigned long ip, const char *fmt, va_list args)
3400 {
3401         if (tracing_selftest_running && tr == &global_trace)
3402                 return 0;
3403
3404         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3405 }
3406
3407 /**
3408  * trace_array_printk - Print a message to a specific instance
3409  * @tr: The instance trace_array descriptor
3410  * @ip: The instruction pointer that this is called from.
3411  * @fmt: The format to print (printf format)
3412  *
3413  * If a subsystem sets up its own instance, they have the right to
3414  * printk strings into their tracing instance buffer using this
3415  * function. Note, this function will not write into the top level
3416  * buffer (use trace_printk() for that), as writing into the top level
3417  * buffer should only have events that can be individually disabled.
3418  * trace_printk() is only used for debugging a kernel, and should not
3419  * be ever incorporated in normal use.
3420  *
3421  * trace_array_printk() can be used, as it will not add noise to the
3422  * top level tracing buffer.
3423  *
3424  * Note, trace_array_init_printk() must be called on @tr before this
3425  * can be used.
3426  */
3427 __printf(3, 0)
3428 int trace_array_printk(struct trace_array *tr,
3429                        unsigned long ip, const char *fmt, ...)
3430 {
3431         int ret;
3432         va_list ap;
3433
3434         if (!tr)
3435                 return -ENOENT;
3436
3437         /* This is only allowed for created instances */
3438         if (tr == &global_trace)
3439                 return 0;
3440
3441         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3442                 return 0;
3443
3444         va_start(ap, fmt);
3445         ret = trace_array_vprintk(tr, ip, fmt, ap);
3446         va_end(ap);
3447         return ret;
3448 }
3449 EXPORT_SYMBOL_GPL(trace_array_printk);
3450
3451 /**
3452  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3453  * @tr: The trace array to initialize the buffers for
3454  *
3455  * As trace_array_printk() only writes into instances, they are OK to
3456  * have in the kernel (unlike trace_printk()). This needs to be called
3457  * before trace_array_printk() can be used on a trace_array.
3458  */
3459 int trace_array_init_printk(struct trace_array *tr)
3460 {
3461         if (!tr)
3462                 return -ENOENT;
3463
3464         /* This is only allowed for created instances */
3465         if (tr == &global_trace)
3466                 return -EINVAL;
3467
3468         return alloc_percpu_trace_buffer();
3469 }
3470 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3471
3472 __printf(3, 4)
3473 int trace_array_printk_buf(struct trace_buffer *buffer,
3474                            unsigned long ip, const char *fmt, ...)
3475 {
3476         int ret;
3477         va_list ap;
3478
3479         if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3480                 return 0;
3481
3482         va_start(ap, fmt);
3483         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3484         va_end(ap);
3485         return ret;
3486 }
3487
3488 __printf(2, 0)
3489 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3490 {
3491         return trace_array_vprintk(printk_trace, ip, fmt, args);
3492 }
3493 EXPORT_SYMBOL_GPL(trace_vprintk);
3494
3495 static void trace_iterator_increment(struct trace_iterator *iter)
3496 {
3497         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3498
3499         iter->idx++;
3500         if (buf_iter)
3501                 ring_buffer_iter_advance(buf_iter);
3502 }
3503
3504 static struct trace_entry *
3505 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3506                 unsigned long *lost_events)
3507 {
3508         struct ring_buffer_event *event;
3509         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3510
3511         if (buf_iter) {
3512                 event = ring_buffer_iter_peek(buf_iter, ts);
3513                 if (lost_events)
3514                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3515                                 (unsigned long)-1 : 0;
3516         } else {
3517                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3518                                          lost_events);
3519         }
3520
3521         if (event) {
3522                 iter->ent_size = ring_buffer_event_length(event);
3523                 return ring_buffer_event_data(event);
3524         }
3525         iter->ent_size = 0;
3526         return NULL;
3527 }
3528
3529 static struct trace_entry *
3530 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3531                   unsigned long *missing_events, u64 *ent_ts)
3532 {
3533         struct trace_buffer *buffer = iter->array_buffer->buffer;
3534         struct trace_entry *ent, *next = NULL;
3535         unsigned long lost_events = 0, next_lost = 0;
3536         int cpu_file = iter->cpu_file;
3537         u64 next_ts = 0, ts;
3538         int next_cpu = -1;
3539         int next_size = 0;
3540         int cpu;
3541
3542         /*
3543          * If we are in a per_cpu trace file, don't bother by iterating over
3544          * all cpu and peek directly.
3545          */
3546         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3547                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3548                         return NULL;
3549                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3550                 if (ent_cpu)
3551                         *ent_cpu = cpu_file;
3552
3553                 return ent;
3554         }
3555
3556         for_each_tracing_cpu(cpu) {
3557
3558                 if (ring_buffer_empty_cpu(buffer, cpu))
3559                         continue;
3560
3561                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3562
3563                 /*
3564                  * Pick the entry with the smallest timestamp:
3565                  */
3566                 if (ent && (!next || ts < next_ts)) {
3567                         next = ent;
3568                         next_cpu = cpu;
3569                         next_ts = ts;
3570                         next_lost = lost_events;
3571                         next_size = iter->ent_size;
3572                 }
3573         }
3574
3575         iter->ent_size = next_size;
3576
3577         if (ent_cpu)
3578                 *ent_cpu = next_cpu;
3579
3580         if (ent_ts)
3581                 *ent_ts = next_ts;
3582
3583         if (missing_events)
3584                 *missing_events = next_lost;
3585
3586         return next;
3587 }
3588
3589 #define STATIC_FMT_BUF_SIZE     128
3590 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3591
3592 char *trace_iter_expand_format(struct trace_iterator *iter)
3593 {
3594         char *tmp;
3595
3596         /*
3597          * iter->tr is NULL when used with tp_printk, which makes
3598          * this get called where it is not safe to call krealloc().
3599          */
3600         if (!iter->tr || iter->fmt == static_fmt_buf)
3601                 return NULL;
3602
3603         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3604                        GFP_KERNEL);
3605         if (tmp) {
3606                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3607                 iter->fmt = tmp;
3608         }
3609
3610         return tmp;
3611 }
3612
3613 /* Returns true if the string is safe to dereference from an event */
3614 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3615 {
3616         unsigned long addr = (unsigned long)str;
3617         struct trace_event *trace_event;
3618         struct trace_event_call *event;
3619
3620         /* OK if part of the event data */
3621         if ((addr >= (unsigned long)iter->ent) &&
3622             (addr < (unsigned long)iter->ent + iter->ent_size))
3623                 return true;
3624
3625         /* OK if part of the temp seq buffer */
3626         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3627             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3628                 return true;
3629
3630         /* Core rodata can not be freed */
3631         if (is_kernel_rodata(addr))
3632                 return true;
3633
3634         if (trace_is_tracepoint_string(str))
3635                 return true;
3636
3637         /*
3638          * Now this could be a module event, referencing core module
3639          * data, which is OK.
3640          */
3641         if (!iter->ent)
3642                 return false;
3643
3644         trace_event = ftrace_find_event(iter->ent->type);
3645         if (!trace_event)
3646                 return false;
3647
3648         event = container_of(trace_event, struct trace_event_call, event);
3649         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3650                 return false;
3651
3652         /* Would rather have rodata, but this will suffice */
3653         if (within_module_core(addr, event->module))
3654                 return true;
3655
3656         return false;
3657 }
3658
3659 /**
3660  * ignore_event - Check dereferenced fields while writing to the seq buffer
3661  * @iter: The iterator that holds the seq buffer and the event being printed
3662  *
3663  * At boot up, test_event_printk() will flag any event that dereferences
3664  * a string with "%s" that does exist in the ring buffer. It may still
3665  * be valid, as the string may point to a static string in the kernel
3666  * rodata that never gets freed. But if the string pointer is pointing
3667  * to something that was allocated, there's a chance that it can be freed
3668  * by the time the user reads the trace. This would cause a bad memory
3669  * access by the kernel and possibly crash the system.
3670  *
3671  * This function will check if the event has any fields flagged as needing
3672  * to be checked at runtime and perform those checks.
3673  *
3674  * If it is found that a field is unsafe, it will write into the @iter->seq
3675  * a message stating what was found to be unsafe.
3676  *
3677  * @return: true if the event is unsafe and should be ignored,
3678  *          false otherwise.
3679  */
3680 bool ignore_event(struct trace_iterator *iter)
3681 {
3682         struct ftrace_event_field *field;
3683         struct trace_event *trace_event;
3684         struct trace_event_call *event;
3685         struct list_head *head;
3686         struct trace_seq *seq;
3687         const void *ptr;
3688
3689         trace_event = ftrace_find_event(iter->ent->type);
3690
3691         seq = &iter->seq;
3692
3693         if (!trace_event) {
3694                 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3695                 return true;
3696         }
3697
3698         event = container_of(trace_event, struct trace_event_call, event);
3699         if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3700                 return false;
3701
3702         head = trace_get_fields(event);
3703         if (!head) {
3704                 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3705                                  trace_event_name(event));
3706                 return true;
3707         }
3708
3709         /* Offsets are from the iter->ent that points to the raw event */
3710         ptr = iter->ent;
3711
3712         list_for_each_entry(field, head, link) {
3713                 const char *str;
3714                 bool good;
3715
3716                 if (!field->needs_test)
3717                         continue;
3718
3719                 str = *(const char **)(ptr + field->offset);
3720
3721                 good = trace_safe_str(iter, str);
3722
3723                 /*
3724                  * If you hit this warning, it is likely that the
3725                  * trace event in question used %s on a string that
3726                  * was saved at the time of the event, but may not be
3727                  * around when the trace is read. Use __string(),
3728                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3729                  * instead. See samples/trace_events/trace-events-sample.h
3730                  * for reference.
3731                  */
3732                 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3733                               trace_event_name(event), field->name)) {
3734                         trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3735                                          trace_event_name(event), field->name);
3736                         return true;
3737                 }
3738         }
3739         return false;
3740 }
3741
3742 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3743 {
3744         const char *p, *new_fmt;
3745         char *q;
3746
3747         if (WARN_ON_ONCE(!fmt))
3748                 return fmt;
3749
3750         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3751                 return fmt;
3752
3753         p = fmt;
3754         new_fmt = q = iter->fmt;
3755         while (*p) {
3756                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3757                         if (!trace_iter_expand_format(iter))
3758                                 return fmt;
3759
3760                         q += iter->fmt - new_fmt;
3761                         new_fmt = iter->fmt;
3762                 }
3763
3764                 *q++ = *p++;
3765
3766                 /* Replace %p with %px */
3767                 if (p[-1] == '%') {
3768                         if (p[0] == '%') {
3769                                 *q++ = *p++;
3770                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3771                                 *q++ = *p++;
3772                                 *q++ = 'x';
3773                         }
3774                 }
3775         }
3776         *q = '\0';
3777
3778         return new_fmt;
3779 }
3780
3781 #define STATIC_TEMP_BUF_SIZE    128
3782 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3783
3784 /* Find the next real entry, without updating the iterator itself */
3785 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3786                                           int *ent_cpu, u64 *ent_ts)
3787 {
3788         /* __find_next_entry will reset ent_size */
3789         int ent_size = iter->ent_size;
3790         struct trace_entry *entry;
3791
3792         /*
3793          * If called from ftrace_dump(), then the iter->temp buffer
3794          * will be the static_temp_buf and not created from kmalloc.
3795          * If the entry size is greater than the buffer, we can
3796          * not save it. Just return NULL in that case. This is only
3797          * used to add markers when two consecutive events' time
3798          * stamps have a large delta. See trace_print_lat_context()
3799          */
3800         if (iter->temp == static_temp_buf &&
3801             STATIC_TEMP_BUF_SIZE < ent_size)
3802                 return NULL;
3803
3804         /*
3805          * The __find_next_entry() may call peek_next_entry(), which may
3806          * call ring_buffer_peek() that may make the contents of iter->ent
3807          * undefined. Need to copy iter->ent now.
3808          */
3809         if (iter->ent && iter->ent != iter->temp) {
3810                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3811                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3812                         void *temp;
3813                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3814                         if (!temp)
3815                                 return NULL;
3816                         kfree(iter->temp);
3817                         iter->temp = temp;
3818                         iter->temp_size = iter->ent_size;
3819                 }
3820                 memcpy(iter->temp, iter->ent, iter->ent_size);
3821                 iter->ent = iter->temp;
3822         }
3823         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3824         /* Put back the original ent_size */
3825         iter->ent_size = ent_size;
3826
3827         return entry;
3828 }
3829
3830 /* Find the next real entry, and increment the iterator to the next entry */
3831 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3832 {
3833         iter->ent = __find_next_entry(iter, &iter->cpu,
3834                                       &iter->lost_events, &iter->ts);
3835
3836         if (iter->ent)
3837                 trace_iterator_increment(iter);
3838
3839         return iter->ent ? iter : NULL;
3840 }
3841
3842 static void trace_consume(struct trace_iterator *iter)
3843 {
3844         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3845                             &iter->lost_events);
3846 }
3847
3848 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3849 {
3850         struct trace_iterator *iter = m->private;
3851         int i = (int)*pos;
3852         void *ent;
3853
3854         WARN_ON_ONCE(iter->leftover);
3855
3856         (*pos)++;
3857
3858         /* can't go backwards */
3859         if (iter->idx > i)
3860                 return NULL;
3861
3862         if (iter->idx < 0)
3863                 ent = trace_find_next_entry_inc(iter);
3864         else
3865                 ent = iter;
3866
3867         while (ent && iter->idx < i)
3868                 ent = trace_find_next_entry_inc(iter);
3869
3870         iter->pos = *pos;
3871
3872         return ent;
3873 }
3874
3875 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3876 {
3877         struct ring_buffer_iter *buf_iter;
3878         unsigned long entries = 0;
3879         u64 ts;
3880
3881         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3882
3883         buf_iter = trace_buffer_iter(iter, cpu);
3884         if (!buf_iter)
3885                 return;
3886
3887         ring_buffer_iter_reset(buf_iter);
3888
3889         /*
3890          * We could have the case with the max latency tracers
3891          * that a reset never took place on a cpu. This is evident
3892          * by the timestamp being before the start of the buffer.
3893          */
3894         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3895                 if (ts >= iter->array_buffer->time_start)
3896                         break;
3897                 entries++;
3898                 ring_buffer_iter_advance(buf_iter);
3899                 /* This could be a big loop */
3900                 cond_resched();
3901         }
3902
3903         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3904 }
3905
3906 /*
3907  * The current tracer is copied to avoid a global locking
3908  * all around.
3909  */
3910 static void *s_start(struct seq_file *m, loff_t *pos)
3911 {
3912         struct trace_iterator *iter = m->private;
3913         struct trace_array *tr = iter->tr;
3914         int cpu_file = iter->cpu_file;
3915         void *p = NULL;
3916         loff_t l = 0;
3917         int cpu;
3918
3919         mutex_lock(&trace_types_lock);
3920         if (unlikely(tr->current_trace != iter->trace)) {
3921                 /* Close iter->trace before switching to the new current tracer */
3922                 if (iter->trace->close)
3923                         iter->trace->close(iter);
3924                 iter->trace = tr->current_trace;
3925                 /* Reopen the new current tracer */
3926                 if (iter->trace->open)
3927                         iter->trace->open(iter);
3928         }
3929         mutex_unlock(&trace_types_lock);
3930
3931 #ifdef CONFIG_TRACER_MAX_TRACE
3932         if (iter->snapshot && iter->trace->use_max_tr)
3933                 return ERR_PTR(-EBUSY);
3934 #endif
3935
3936         if (*pos != iter->pos) {
3937                 iter->ent = NULL;
3938                 iter->cpu = 0;
3939                 iter->idx = -1;
3940
3941                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3942                         for_each_tracing_cpu(cpu)
3943                                 tracing_iter_reset(iter, cpu);
3944                 } else
3945                         tracing_iter_reset(iter, cpu_file);
3946
3947                 iter->leftover = 0;
3948                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3949                         ;
3950
3951         } else {
3952                 /*
3953                  * If we overflowed the seq_file before, then we want
3954                  * to just reuse the trace_seq buffer again.
3955                  */
3956                 if (iter->leftover)
3957                         p = iter;
3958                 else {
3959                         l = *pos - 1;
3960                         p = s_next(m, p, &l);
3961                 }
3962         }
3963
3964         trace_event_read_lock();
3965         trace_access_lock(cpu_file);
3966         return p;
3967 }
3968
3969 static void s_stop(struct seq_file *m, void *p)
3970 {
3971         struct trace_iterator *iter = m->private;
3972
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974         if (iter->snapshot && iter->trace->use_max_tr)
3975                 return;
3976 #endif
3977
3978         trace_access_unlock(iter->cpu_file);
3979         trace_event_read_unlock();
3980 }
3981
3982 static void
3983 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3984                       unsigned long *entries, int cpu)
3985 {
3986         unsigned long count;
3987
3988         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3989         /*
3990          * If this buffer has skipped entries, then we hold all
3991          * entries for the trace and we need to ignore the
3992          * ones before the time stamp.
3993          */
3994         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3995                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3996                 /* total is the same as the entries */
3997                 *total = count;
3998         } else
3999                 *total = count +
4000                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4001         *entries = count;
4002 }
4003
4004 static void
4005 get_total_entries(struct array_buffer *buf,
4006                   unsigned long *total, unsigned long *entries)
4007 {
4008         unsigned long t, e;
4009         int cpu;
4010
4011         *total = 0;
4012         *entries = 0;
4013
4014         for_each_tracing_cpu(cpu) {
4015                 get_total_entries_cpu(buf, &t, &e, cpu);
4016                 *total += t;
4017                 *entries += e;
4018         }
4019 }
4020
4021 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4022 {
4023         unsigned long total, entries;
4024
4025         if (!tr)
4026                 tr = &global_trace;
4027
4028         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4029
4030         return entries;
4031 }
4032
4033 unsigned long trace_total_entries(struct trace_array *tr)
4034 {
4035         unsigned long total, entries;
4036
4037         if (!tr)
4038                 tr = &global_trace;
4039
4040         get_total_entries(&tr->array_buffer, &total, &entries);
4041
4042         return entries;
4043 }
4044
4045 static void print_lat_help_header(struct seq_file *m)
4046 {
4047         seq_puts(m, "#                    _------=> CPU#            \n"
4048                     "#                   / _-----=> irqs-off/BH-disabled\n"
4049                     "#                  | / _----=> need-resched    \n"
4050                     "#                  || / _---=> hardirq/softirq \n"
4051                     "#                  ||| / _--=> preempt-depth   \n"
4052                     "#                  |||| / _-=> migrate-disable \n"
4053                     "#                  ||||| /     delay           \n"
4054                     "#  cmd     pid     |||||| time  |   caller     \n"
4055                     "#     \\   /        ||||||  \\    |    /       \n");
4056 }
4057
4058 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4059 {
4060         unsigned long total;
4061         unsigned long entries;
4062
4063         get_total_entries(buf, &total, &entries);
4064         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4065                    entries, total, num_online_cpus());
4066         seq_puts(m, "#\n");
4067 }
4068
4069 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4070                                    unsigned int flags)
4071 {
4072         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4073
4074         print_event_info(buf, m);
4075
4076         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4077         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4078 }
4079
4080 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4081                                        unsigned int flags)
4082 {
4083         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4084         static const char space[] = "            ";
4085         int prec = tgid ? 12 : 2;
4086
4087         print_event_info(buf, m);
4088
4089         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4090         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4091         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4092         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4093         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4094         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4095         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4096         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4097 }
4098
4099 void
4100 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4101 {
4102         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4103         struct array_buffer *buf = iter->array_buffer;
4104         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4105         struct tracer *type = iter->trace;
4106         unsigned long entries;
4107         unsigned long total;
4108         const char *name = type->name;
4109
4110         get_total_entries(buf, &total, &entries);
4111
4112         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4113                    name, init_utsname()->release);
4114         seq_puts(m, "# -----------------------------------"
4115                  "---------------------------------\n");
4116         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4117                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4118                    nsecs_to_usecs(data->saved_latency),
4119                    entries,
4120                    total,
4121                    buf->cpu,
4122                    preempt_model_none()      ? "server" :
4123                    preempt_model_voluntary() ? "desktop" :
4124                    preempt_model_full()      ? "preempt" :
4125                    preempt_model_rt()        ? "preempt_rt" :
4126                    "unknown",
4127                    /* These are reserved for later use */
4128                    0, 0, 0, 0);
4129 #ifdef CONFIG_SMP
4130         seq_printf(m, " #P:%d)\n", num_online_cpus());
4131 #else
4132         seq_puts(m, ")\n");
4133 #endif
4134         seq_puts(m, "#    -----------------\n");
4135         seq_printf(m, "#    | task: %.16s-%d "
4136                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4137                    data->comm, data->pid,
4138                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4139                    data->policy, data->rt_priority);
4140         seq_puts(m, "#    -----------------\n");
4141
4142         if (data->critical_start) {
4143                 seq_puts(m, "#  => started at: ");
4144                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4145                 trace_print_seq(m, &iter->seq);
4146                 seq_puts(m, "\n#  => ended at:   ");
4147                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4148                 trace_print_seq(m, &iter->seq);
4149                 seq_puts(m, "\n#\n");
4150         }
4151
4152         seq_puts(m, "#\n");
4153 }
4154
4155 static void test_cpu_buff_start(struct trace_iterator *iter)
4156 {
4157         struct trace_seq *s = &iter->seq;
4158         struct trace_array *tr = iter->tr;
4159
4160         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4161                 return;
4162
4163         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4164                 return;
4165
4166         if (cpumask_available(iter->started) &&
4167             cpumask_test_cpu(iter->cpu, iter->started))
4168                 return;
4169
4170         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4171                 return;
4172
4173         if (cpumask_available(iter->started))
4174                 cpumask_set_cpu(iter->cpu, iter->started);
4175
4176         /* Don't print started cpu buffer for the first entry of the trace */
4177         if (iter->idx > 1)
4178                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4179                                 iter->cpu);
4180 }
4181
4182 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4183 {
4184         struct trace_array *tr = iter->tr;
4185         struct trace_seq *s = &iter->seq;
4186         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4187         struct trace_entry *entry;
4188         struct trace_event *event;
4189
4190         entry = iter->ent;
4191
4192         test_cpu_buff_start(iter);
4193
4194         event = ftrace_find_event(entry->type);
4195
4196         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4197                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4198                         trace_print_lat_context(iter);
4199                 else
4200                         trace_print_context(iter);
4201         }
4202
4203         if (trace_seq_has_overflowed(s))
4204                 return TRACE_TYPE_PARTIAL_LINE;
4205
4206         if (event) {
4207                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4208                         return print_event_fields(iter, event);
4209                 /*
4210                  * For TRACE_EVENT() events, the print_fmt is not
4211                  * safe to use if the array has delta offsets
4212                  * Force printing via the fields.
4213                  */
4214                 if ((tr->text_delta || tr->data_delta) &&
4215                     event->type > __TRACE_LAST_TYPE)
4216                         return print_event_fields(iter, event);
4217
4218                 return event->funcs->trace(iter, sym_flags, event);
4219         }
4220
4221         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4222
4223         return trace_handle_return(s);
4224 }
4225
4226 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4227 {
4228         struct trace_array *tr = iter->tr;
4229         struct trace_seq *s = &iter->seq;
4230         struct trace_entry *entry;
4231         struct trace_event *event;
4232
4233         entry = iter->ent;
4234
4235         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4236                 trace_seq_printf(s, "%d %d %llu ",
4237                                  entry->pid, iter->cpu, iter->ts);
4238
4239         if (trace_seq_has_overflowed(s))
4240                 return TRACE_TYPE_PARTIAL_LINE;
4241
4242         event = ftrace_find_event(entry->type);
4243         if (event)
4244                 return event->funcs->raw(iter, 0, event);
4245
4246         trace_seq_printf(s, "%d ?\n", entry->type);
4247
4248         return trace_handle_return(s);
4249 }
4250
4251 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4252 {
4253         struct trace_array *tr = iter->tr;
4254         struct trace_seq *s = &iter->seq;
4255         unsigned char newline = '\n';
4256         struct trace_entry *entry;
4257         struct trace_event *event;
4258
4259         entry = iter->ent;
4260
4261         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4262                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4263                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4264                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4265                 if (trace_seq_has_overflowed(s))
4266                         return TRACE_TYPE_PARTIAL_LINE;
4267         }
4268
4269         event = ftrace_find_event(entry->type);
4270         if (event) {
4271                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4272                 if (ret != TRACE_TYPE_HANDLED)
4273                         return ret;
4274         }
4275
4276         SEQ_PUT_FIELD(s, newline);
4277
4278         return trace_handle_return(s);
4279 }
4280
4281 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4282 {
4283         struct trace_array *tr = iter->tr;
4284         struct trace_seq *s = &iter->seq;
4285         struct trace_entry *entry;
4286         struct trace_event *event;
4287
4288         entry = iter->ent;
4289
4290         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4291                 SEQ_PUT_FIELD(s, entry->pid);
4292                 SEQ_PUT_FIELD(s, iter->cpu);
4293                 SEQ_PUT_FIELD(s, iter->ts);
4294                 if (trace_seq_has_overflowed(s))
4295                         return TRACE_TYPE_PARTIAL_LINE;
4296         }
4297
4298         event = ftrace_find_event(entry->type);
4299         return event ? event->funcs->binary(iter, 0, event) :
4300                 TRACE_TYPE_HANDLED;
4301 }
4302
4303 int trace_empty(struct trace_iterator *iter)
4304 {
4305         struct ring_buffer_iter *buf_iter;
4306         int cpu;
4307
4308         /* If we are looking at one CPU buffer, only check that one */
4309         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4310                 cpu = iter->cpu_file;
4311                 buf_iter = trace_buffer_iter(iter, cpu);
4312                 if (buf_iter) {
4313                         if (!ring_buffer_iter_empty(buf_iter))
4314                                 return 0;
4315                 } else {
4316                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4317                                 return 0;
4318                 }
4319                 return 1;
4320         }
4321
4322         for_each_tracing_cpu(cpu) {
4323                 buf_iter = trace_buffer_iter(iter, cpu);
4324                 if (buf_iter) {
4325                         if (!ring_buffer_iter_empty(buf_iter))
4326                                 return 0;
4327                 } else {
4328                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4329                                 return 0;
4330                 }
4331         }
4332
4333         return 1;
4334 }
4335
4336 /*  Called with trace_event_read_lock() held. */
4337 enum print_line_t print_trace_line(struct trace_iterator *iter)
4338 {
4339         struct trace_array *tr = iter->tr;
4340         unsigned long trace_flags = tr->trace_flags;
4341         enum print_line_t ret;
4342
4343         if (iter->lost_events) {
4344                 if (iter->lost_events == (unsigned long)-1)
4345                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4346                                          iter->cpu);
4347                 else
4348                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4349                                          iter->cpu, iter->lost_events);
4350                 if (trace_seq_has_overflowed(&iter->seq))
4351                         return TRACE_TYPE_PARTIAL_LINE;
4352         }
4353
4354         if (iter->trace && iter->trace->print_line) {
4355                 ret = iter->trace->print_line(iter);
4356                 if (ret != TRACE_TYPE_UNHANDLED)
4357                         return ret;
4358         }
4359
4360         if (iter->ent->type == TRACE_BPUTS &&
4361                         trace_flags & TRACE_ITER_PRINTK &&
4362                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4363                 return trace_print_bputs_msg_only(iter);
4364
4365         if (iter->ent->type == TRACE_BPRINT &&
4366                         trace_flags & TRACE_ITER_PRINTK &&
4367                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4368                 return trace_print_bprintk_msg_only(iter);
4369
4370         if (iter->ent->type == TRACE_PRINT &&
4371                         trace_flags & TRACE_ITER_PRINTK &&
4372                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4373                 return trace_print_printk_msg_only(iter);
4374
4375         if (trace_flags & TRACE_ITER_BIN)
4376                 return print_bin_fmt(iter);
4377
4378         if (trace_flags & TRACE_ITER_HEX)
4379                 return print_hex_fmt(iter);
4380
4381         if (trace_flags & TRACE_ITER_RAW)
4382                 return print_raw_fmt(iter);
4383
4384         return print_trace_fmt(iter);
4385 }
4386
4387 void trace_latency_header(struct seq_file *m)
4388 {
4389         struct trace_iterator *iter = m->private;
4390         struct trace_array *tr = iter->tr;
4391
4392         /* print nothing if the buffers are empty */
4393         if (trace_empty(iter))
4394                 return;
4395
4396         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4397                 print_trace_header(m, iter);
4398
4399         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4400                 print_lat_help_header(m);
4401 }
4402
4403 void trace_default_header(struct seq_file *m)
4404 {
4405         struct trace_iterator *iter = m->private;
4406         struct trace_array *tr = iter->tr;
4407         unsigned long trace_flags = tr->trace_flags;
4408
4409         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4410                 return;
4411
4412         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4413                 /* print nothing if the buffers are empty */
4414                 if (trace_empty(iter))
4415                         return;
4416                 print_trace_header(m, iter);
4417                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4418                         print_lat_help_header(m);
4419         } else {
4420                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4421                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4422                                 print_func_help_header_irq(iter->array_buffer,
4423                                                            m, trace_flags);
4424                         else
4425                                 print_func_help_header(iter->array_buffer, m,
4426                                                        trace_flags);
4427                 }
4428         }
4429 }
4430
4431 static void test_ftrace_alive(struct seq_file *m)
4432 {
4433         if (!ftrace_is_dead())
4434                 return;
4435         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4436                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4437 }
4438
4439 #ifdef CONFIG_TRACER_MAX_TRACE
4440 static void show_snapshot_main_help(struct seq_file *m)
4441 {
4442         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4443                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4444                     "#                      Takes a snapshot of the main buffer.\n"
4445                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4446                     "#                      (Doesn't have to be '2' works with any number that\n"
4447                     "#                       is not a '0' or '1')\n");
4448 }
4449
4450 static void show_snapshot_percpu_help(struct seq_file *m)
4451 {
4452         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4453 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4454         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4455                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4456 #else
4457         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4458                     "#                     Must use main snapshot file to allocate.\n");
4459 #endif
4460         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4461                     "#                      (Doesn't have to be '2' works with any number that\n"
4462                     "#                       is not a '0' or '1')\n");
4463 }
4464
4465 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4466 {
4467         if (iter->tr->allocated_snapshot)
4468                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4469         else
4470                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4471
4472         seq_puts(m, "# Snapshot commands:\n");
4473         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4474                 show_snapshot_main_help(m);
4475         else
4476                 show_snapshot_percpu_help(m);
4477 }
4478 #else
4479 /* Should never be called */
4480 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4481 #endif
4482
4483 static int s_show(struct seq_file *m, void *v)
4484 {
4485         struct trace_iterator *iter = v;
4486         int ret;
4487
4488         if (iter->ent == NULL) {
4489                 if (iter->tr) {
4490                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4491                         seq_puts(m, "#\n");
4492                         test_ftrace_alive(m);
4493                 }
4494                 if (iter->snapshot && trace_empty(iter))
4495                         print_snapshot_help(m, iter);
4496                 else if (iter->trace && iter->trace->print_header)
4497                         iter->trace->print_header(m);
4498                 else
4499                         trace_default_header(m);
4500
4501         } else if (iter->leftover) {
4502                 /*
4503                  * If we filled the seq_file buffer earlier, we
4504                  * want to just show it now.
4505                  */
4506                 ret = trace_print_seq(m, &iter->seq);
4507
4508                 /* ret should this time be zero, but you never know */
4509                 iter->leftover = ret;
4510
4511         } else {
4512                 ret = print_trace_line(iter);
4513                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4514                         iter->seq.full = 0;
4515                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4516                 }
4517                 ret = trace_print_seq(m, &iter->seq);
4518                 /*
4519                  * If we overflow the seq_file buffer, then it will
4520                  * ask us for this data again at start up.
4521                  * Use that instead.
4522                  *  ret is 0 if seq_file write succeeded.
4523                  *        -1 otherwise.
4524                  */
4525                 iter->leftover = ret;
4526         }
4527
4528         return 0;
4529 }
4530
4531 /*
4532  * Should be used after trace_array_get(), trace_types_lock
4533  * ensures that i_cdev was already initialized.
4534  */
4535 static inline int tracing_get_cpu(struct inode *inode)
4536 {
4537         if (inode->i_cdev) /* See trace_create_cpu_file() */
4538                 return (long)inode->i_cdev - 1;
4539         return RING_BUFFER_ALL_CPUS;
4540 }
4541
4542 static const struct seq_operations tracer_seq_ops = {
4543         .start          = s_start,
4544         .next           = s_next,
4545         .stop           = s_stop,
4546         .show           = s_show,
4547 };
4548
4549 /*
4550  * Note, as iter itself can be allocated and freed in different
4551  * ways, this function is only used to free its content, and not
4552  * the iterator itself. The only requirement to all the allocations
4553  * is that it must zero all fields (kzalloc), as freeing works with
4554  * ethier allocated content or NULL.
4555  */
4556 static void free_trace_iter_content(struct trace_iterator *iter)
4557 {
4558         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4559         if (iter->fmt != static_fmt_buf)
4560                 kfree(iter->fmt);
4561
4562         kfree(iter->temp);
4563         kfree(iter->buffer_iter);
4564         mutex_destroy(&iter->mutex);
4565         free_cpumask_var(iter->started);
4566 }
4567
4568 static struct trace_iterator *
4569 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4570 {
4571         struct trace_array *tr = inode->i_private;
4572         struct trace_iterator *iter;
4573         int cpu;
4574
4575         if (tracing_disabled)
4576                 return ERR_PTR(-ENODEV);
4577
4578         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4579         if (!iter)
4580                 return ERR_PTR(-ENOMEM);
4581
4582         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4583                                     GFP_KERNEL);
4584         if (!iter->buffer_iter)
4585                 goto release;
4586
4587         /*
4588          * trace_find_next_entry() may need to save off iter->ent.
4589          * It will place it into the iter->temp buffer. As most
4590          * events are less than 128, allocate a buffer of that size.
4591          * If one is greater, then trace_find_next_entry() will
4592          * allocate a new buffer to adjust for the bigger iter->ent.
4593          * It's not critical if it fails to get allocated here.
4594          */
4595         iter->temp = kmalloc(128, GFP_KERNEL);
4596         if (iter->temp)
4597                 iter->temp_size = 128;
4598
4599         /*
4600          * trace_event_printf() may need to modify given format
4601          * string to replace %p with %px so that it shows real address
4602          * instead of hash value. However, that is only for the event
4603          * tracing, other tracer may not need. Defer the allocation
4604          * until it is needed.
4605          */
4606         iter->fmt = NULL;
4607         iter->fmt_size = 0;
4608
4609         mutex_lock(&trace_types_lock);
4610         iter->trace = tr->current_trace;
4611
4612         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4613                 goto fail;
4614
4615         iter->tr = tr;
4616
4617 #ifdef CONFIG_TRACER_MAX_TRACE
4618         /* Currently only the top directory has a snapshot */
4619         if (tr->current_trace->print_max || snapshot)
4620                 iter->array_buffer = &tr->max_buffer;
4621         else
4622 #endif
4623                 iter->array_buffer = &tr->array_buffer;
4624         iter->snapshot = snapshot;
4625         iter->pos = -1;
4626         iter->cpu_file = tracing_get_cpu(inode);
4627         mutex_init(&iter->mutex);
4628
4629         /* Notify the tracer early; before we stop tracing. */
4630         if (iter->trace->open)
4631                 iter->trace->open(iter);
4632
4633         /* Annotate start of buffers if we had overruns */
4634         if (ring_buffer_overruns(iter->array_buffer->buffer))
4635                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4636
4637         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4638         if (trace_clocks[tr->clock_id].in_ns)
4639                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4640
4641         /*
4642          * If pause-on-trace is enabled, then stop the trace while
4643          * dumping, unless this is the "snapshot" file
4644          */
4645         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4646                 tracing_stop_tr(tr);
4647
4648         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4649                 for_each_tracing_cpu(cpu) {
4650                         iter->buffer_iter[cpu] =
4651                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4652                                                          cpu, GFP_KERNEL);
4653                 }
4654                 ring_buffer_read_prepare_sync();
4655                 for_each_tracing_cpu(cpu) {
4656                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4657                         tracing_iter_reset(iter, cpu);
4658                 }
4659         } else {
4660                 cpu = iter->cpu_file;
4661                 iter->buffer_iter[cpu] =
4662                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4663                                                  cpu, GFP_KERNEL);
4664                 ring_buffer_read_prepare_sync();
4665                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4666                 tracing_iter_reset(iter, cpu);
4667         }
4668
4669         mutex_unlock(&trace_types_lock);
4670
4671         return iter;
4672
4673  fail:
4674         mutex_unlock(&trace_types_lock);
4675         free_trace_iter_content(iter);
4676 release:
4677         seq_release_private(inode, file);
4678         return ERR_PTR(-ENOMEM);
4679 }
4680
4681 int tracing_open_generic(struct inode *inode, struct file *filp)
4682 {
4683         int ret;
4684
4685         ret = tracing_check_open_get_tr(NULL);
4686         if (ret)
4687                 return ret;
4688
4689         filp->private_data = inode->i_private;
4690         return 0;
4691 }
4692
4693 bool tracing_is_disabled(void)
4694 {
4695         return (tracing_disabled) ? true: false;
4696 }
4697
4698 /*
4699  * Open and update trace_array ref count.
4700  * Must have the current trace_array passed to it.
4701  */
4702 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4703 {
4704         struct trace_array *tr = inode->i_private;
4705         int ret;
4706
4707         ret = tracing_check_open_get_tr(tr);
4708         if (ret)
4709                 return ret;
4710
4711         filp->private_data = inode->i_private;
4712
4713         return 0;
4714 }
4715
4716 /*
4717  * The private pointer of the inode is the trace_event_file.
4718  * Update the tr ref count associated to it.
4719  */
4720 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4721 {
4722         struct trace_event_file *file = inode->i_private;
4723         int ret;
4724
4725         ret = tracing_check_open_get_tr(file->tr);
4726         if (ret)
4727                 return ret;
4728
4729         mutex_lock(&event_mutex);
4730
4731         /* Fail if the file is marked for removal */
4732         if (file->flags & EVENT_FILE_FL_FREED) {
4733                 trace_array_put(file->tr);
4734                 ret = -ENODEV;
4735         } else {
4736                 event_file_get(file);
4737         }
4738
4739         mutex_unlock(&event_mutex);
4740         if (ret)
4741                 return ret;
4742
4743         filp->private_data = inode->i_private;
4744
4745         return 0;
4746 }
4747
4748 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4749 {
4750         struct trace_event_file *file = inode->i_private;
4751
4752         trace_array_put(file->tr);
4753         event_file_put(file);
4754
4755         return 0;
4756 }
4757
4758 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4759 {
4760         tracing_release_file_tr(inode, filp);
4761         return single_release(inode, filp);
4762 }
4763
4764 static int tracing_mark_open(struct inode *inode, struct file *filp)
4765 {
4766         stream_open(inode, filp);
4767         return tracing_open_generic_tr(inode, filp);
4768 }
4769
4770 static int tracing_release(struct inode *inode, struct file *file)
4771 {
4772         struct trace_array *tr = inode->i_private;
4773         struct seq_file *m = file->private_data;
4774         struct trace_iterator *iter;
4775         int cpu;
4776
4777         if (!(file->f_mode & FMODE_READ)) {
4778                 trace_array_put(tr);
4779                 return 0;
4780         }
4781
4782         /* Writes do not use seq_file */
4783         iter = m->private;
4784         mutex_lock(&trace_types_lock);
4785
4786         for_each_tracing_cpu(cpu) {
4787                 if (iter->buffer_iter[cpu])
4788                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4789         }
4790
4791         if (iter->trace && iter->trace->close)
4792                 iter->trace->close(iter);
4793
4794         if (!iter->snapshot && tr->stop_count)
4795                 /* reenable tracing if it was previously enabled */
4796                 tracing_start_tr(tr);
4797
4798         __trace_array_put(tr);
4799
4800         mutex_unlock(&trace_types_lock);
4801
4802         free_trace_iter_content(iter);
4803         seq_release_private(inode, file);
4804
4805         return 0;
4806 }
4807
4808 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4809 {
4810         struct trace_array *tr = inode->i_private;
4811
4812         trace_array_put(tr);
4813         return 0;
4814 }
4815
4816 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4817 {
4818         struct trace_array *tr = inode->i_private;
4819
4820         trace_array_put(tr);
4821
4822         return single_release(inode, file);
4823 }
4824
4825 static int tracing_open(struct inode *inode, struct file *file)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct trace_iterator *iter;
4829         int ret;
4830
4831         ret = tracing_check_open_get_tr(tr);
4832         if (ret)
4833                 return ret;
4834
4835         /* If this file was open for write, then erase contents */
4836         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4837                 int cpu = tracing_get_cpu(inode);
4838                 struct array_buffer *trace_buf = &tr->array_buffer;
4839
4840 #ifdef CONFIG_TRACER_MAX_TRACE
4841                 if (tr->current_trace->print_max)
4842                         trace_buf = &tr->max_buffer;
4843 #endif
4844
4845                 if (cpu == RING_BUFFER_ALL_CPUS)
4846                         tracing_reset_online_cpus(trace_buf);
4847                 else
4848                         tracing_reset_cpu(trace_buf, cpu);
4849         }
4850
4851         if (file->f_mode & FMODE_READ) {
4852                 iter = __tracing_open(inode, file, false);
4853                 if (IS_ERR(iter))
4854                         ret = PTR_ERR(iter);
4855                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4856                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4857         }
4858
4859         if (ret < 0)
4860                 trace_array_put(tr);
4861
4862         return ret;
4863 }
4864
4865 /*
4866  * Some tracers are not suitable for instance buffers.
4867  * A tracer is always available for the global array (toplevel)
4868  * or if it explicitly states that it is.
4869  */
4870 static bool
4871 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4872 {
4873 #ifdef CONFIG_TRACER_SNAPSHOT
4874         /* arrays with mapped buffer range do not have snapshots */
4875         if (tr->range_addr_start && t->use_max_tr)
4876                 return false;
4877 #endif
4878         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4879 }
4880
4881 /* Find the next tracer that this trace array may use */
4882 static struct tracer *
4883 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4884 {
4885         while (t && !trace_ok_for_array(t, tr))
4886                 t = t->next;
4887
4888         return t;
4889 }
4890
4891 static void *
4892 t_next(struct seq_file *m, void *v, loff_t *pos)
4893 {
4894         struct trace_array *tr = m->private;
4895         struct tracer *t = v;
4896
4897         (*pos)++;
4898
4899         if (t)
4900                 t = get_tracer_for_array(tr, t->next);
4901
4902         return t;
4903 }
4904
4905 static void *t_start(struct seq_file *m, loff_t *pos)
4906 {
4907         struct trace_array *tr = m->private;
4908         struct tracer *t;
4909         loff_t l = 0;
4910
4911         mutex_lock(&trace_types_lock);
4912
4913         t = get_tracer_for_array(tr, trace_types);
4914         for (; t && l < *pos; t = t_next(m, t, &l))
4915                         ;
4916
4917         return t;
4918 }
4919
4920 static void t_stop(struct seq_file *m, void *p)
4921 {
4922         mutex_unlock(&trace_types_lock);
4923 }
4924
4925 static int t_show(struct seq_file *m, void *v)
4926 {
4927         struct tracer *t = v;
4928
4929         if (!t)
4930                 return 0;
4931
4932         seq_puts(m, t->name);
4933         if (t->next)
4934                 seq_putc(m, ' ');
4935         else
4936                 seq_putc(m, '\n');
4937
4938         return 0;
4939 }
4940
4941 static const struct seq_operations show_traces_seq_ops = {
4942         .start          = t_start,
4943         .next           = t_next,
4944         .stop           = t_stop,
4945         .show           = t_show,
4946 };
4947
4948 static int show_traces_open(struct inode *inode, struct file *file)
4949 {
4950         struct trace_array *tr = inode->i_private;
4951         struct seq_file *m;
4952         int ret;
4953
4954         ret = tracing_check_open_get_tr(tr);
4955         if (ret)
4956                 return ret;
4957
4958         ret = seq_open(file, &show_traces_seq_ops);
4959         if (ret) {
4960                 trace_array_put(tr);
4961                 return ret;
4962         }
4963
4964         m = file->private_data;
4965         m->private = tr;
4966
4967         return 0;
4968 }
4969
4970 static int tracing_seq_release(struct inode *inode, struct file *file)
4971 {
4972         struct trace_array *tr = inode->i_private;
4973
4974         trace_array_put(tr);
4975         return seq_release(inode, file);
4976 }
4977
4978 static ssize_t
4979 tracing_write_stub(struct file *filp, const char __user *ubuf,
4980                    size_t count, loff_t *ppos)
4981 {
4982         return count;
4983 }
4984
4985 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4986 {
4987         int ret;
4988
4989         if (file->f_mode & FMODE_READ)
4990                 ret = seq_lseek(file, offset, whence);
4991         else
4992                 file->f_pos = ret = 0;
4993
4994         return ret;
4995 }
4996
4997 static const struct file_operations tracing_fops = {
4998         .open           = tracing_open,
4999         .read           = seq_read,
5000         .read_iter      = seq_read_iter,
5001         .splice_read    = copy_splice_read,
5002         .write          = tracing_write_stub,
5003         .llseek         = tracing_lseek,
5004         .release        = tracing_release,
5005 };
5006
5007 static const struct file_operations show_traces_fops = {
5008         .open           = show_traces_open,
5009         .read           = seq_read,
5010         .llseek         = seq_lseek,
5011         .release        = tracing_seq_release,
5012 };
5013
5014 static ssize_t
5015 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5016                      size_t count, loff_t *ppos)
5017 {
5018         struct trace_array *tr = file_inode(filp)->i_private;
5019         char *mask_str;
5020         int len;
5021
5022         len = snprintf(NULL, 0, "%*pb\n",
5023                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5024         mask_str = kmalloc(len, GFP_KERNEL);
5025         if (!mask_str)
5026                 return -ENOMEM;
5027
5028         len = snprintf(mask_str, len, "%*pb\n",
5029                        cpumask_pr_args(tr->tracing_cpumask));
5030         if (len >= count) {
5031                 count = -EINVAL;
5032                 goto out_err;
5033         }
5034         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5035
5036 out_err:
5037         kfree(mask_str);
5038
5039         return count;
5040 }
5041
5042 int tracing_set_cpumask(struct trace_array *tr,
5043                         cpumask_var_t tracing_cpumask_new)
5044 {
5045         int cpu;
5046
5047         if (!tr)
5048                 return -EINVAL;
5049
5050         local_irq_disable();
5051         arch_spin_lock(&tr->max_lock);
5052         for_each_tracing_cpu(cpu) {
5053                 /*
5054                  * Increase/decrease the disabled counter if we are
5055                  * about to flip a bit in the cpumask:
5056                  */
5057                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5058                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5059                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5060                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5061 #ifdef CONFIG_TRACER_MAX_TRACE
5062                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5063 #endif
5064                 }
5065                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5066                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5067                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5068                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5069 #ifdef CONFIG_TRACER_MAX_TRACE
5070                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5071 #endif
5072                 }
5073         }
5074         arch_spin_unlock(&tr->max_lock);
5075         local_irq_enable();
5076
5077         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5078
5079         return 0;
5080 }
5081
5082 static ssize_t
5083 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5084                       size_t count, loff_t *ppos)
5085 {
5086         struct trace_array *tr = file_inode(filp)->i_private;
5087         cpumask_var_t tracing_cpumask_new;
5088         int err;
5089
5090         if (count == 0 || count > KMALLOC_MAX_SIZE)
5091                 return -EINVAL;
5092
5093         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5094                 return -ENOMEM;
5095
5096         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5097         if (err)
5098                 goto err_free;
5099
5100         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5101         if (err)
5102                 goto err_free;
5103
5104         free_cpumask_var(tracing_cpumask_new);
5105
5106         return count;
5107
5108 err_free:
5109         free_cpumask_var(tracing_cpumask_new);
5110
5111         return err;
5112 }
5113
5114 static const struct file_operations tracing_cpumask_fops = {
5115         .open           = tracing_open_generic_tr,
5116         .read           = tracing_cpumask_read,
5117         .write          = tracing_cpumask_write,
5118         .release        = tracing_release_generic_tr,
5119         .llseek         = generic_file_llseek,
5120 };
5121
5122 static int tracing_trace_options_show(struct seq_file *m, void *v)
5123 {
5124         struct tracer_opt *trace_opts;
5125         struct trace_array *tr = m->private;
5126         u32 tracer_flags;
5127         int i;
5128
5129         mutex_lock(&trace_types_lock);
5130         tracer_flags = tr->current_trace->flags->val;
5131         trace_opts = tr->current_trace->flags->opts;
5132
5133         for (i = 0; trace_options[i]; i++) {
5134                 if (tr->trace_flags & (1 << i))
5135                         seq_printf(m, "%s\n", trace_options[i]);
5136                 else
5137                         seq_printf(m, "no%s\n", trace_options[i]);
5138         }
5139
5140         for (i = 0; trace_opts[i].name; i++) {
5141                 if (tracer_flags & trace_opts[i].bit)
5142                         seq_printf(m, "%s\n", trace_opts[i].name);
5143                 else
5144                         seq_printf(m, "no%s\n", trace_opts[i].name);
5145         }
5146         mutex_unlock(&trace_types_lock);
5147
5148         return 0;
5149 }
5150
5151 static int __set_tracer_option(struct trace_array *tr,
5152                                struct tracer_flags *tracer_flags,
5153                                struct tracer_opt *opts, int neg)
5154 {
5155         struct tracer *trace = tracer_flags->trace;
5156         int ret;
5157
5158         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5159         if (ret)
5160                 return ret;
5161
5162         if (neg)
5163                 tracer_flags->val &= ~opts->bit;
5164         else
5165                 tracer_flags->val |= opts->bit;
5166         return 0;
5167 }
5168
5169 /* Try to assign a tracer specific option */
5170 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5171 {
5172         struct tracer *trace = tr->current_trace;
5173         struct tracer_flags *tracer_flags = trace->flags;
5174         struct tracer_opt *opts = NULL;
5175         int i;
5176
5177         for (i = 0; tracer_flags->opts[i].name; i++) {
5178                 opts = &tracer_flags->opts[i];
5179
5180                 if (strcmp(cmp, opts->name) == 0)
5181                         return __set_tracer_option(tr, trace->flags, opts, neg);
5182         }
5183
5184         return -EINVAL;
5185 }
5186
5187 /* Some tracers require overwrite to stay enabled */
5188 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5189 {
5190         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5191                 return -1;
5192
5193         return 0;
5194 }
5195
5196 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5197 {
5198         if ((mask == TRACE_ITER_RECORD_TGID) ||
5199             (mask == TRACE_ITER_RECORD_CMD) ||
5200             (mask == TRACE_ITER_TRACE_PRINTK))
5201                 lockdep_assert_held(&event_mutex);
5202
5203         /* do nothing if flag is already set */
5204         if (!!(tr->trace_flags & mask) == !!enabled)
5205                 return 0;
5206
5207         /* Give the tracer a chance to approve the change */
5208         if (tr->current_trace->flag_changed)
5209                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5210                         return -EINVAL;
5211
5212         if (mask == TRACE_ITER_TRACE_PRINTK) {
5213                 if (enabled) {
5214                         update_printk_trace(tr);
5215                 } else {
5216                         /*
5217                          * The global_trace cannot clear this.
5218                          * It's flag only gets cleared if another instance sets it.
5219                          */
5220                         if (printk_trace == &global_trace)
5221                                 return -EINVAL;
5222                         /*
5223                          * An instance must always have it set.
5224                          * by default, that's the global_trace instane.
5225                          */
5226                         if (printk_trace == tr)
5227                                 update_printk_trace(&global_trace);
5228                 }
5229         }
5230
5231         if (enabled)
5232                 tr->trace_flags |= mask;
5233         else
5234                 tr->trace_flags &= ~mask;
5235
5236         if (mask == TRACE_ITER_RECORD_CMD)
5237                 trace_event_enable_cmd_record(enabled);
5238
5239         if (mask == TRACE_ITER_RECORD_TGID) {
5240
5241                 if (trace_alloc_tgid_map() < 0) {
5242                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5243                         return -ENOMEM;
5244                 }
5245
5246                 trace_event_enable_tgid_record(enabled);
5247         }
5248
5249         if (mask == TRACE_ITER_EVENT_FORK)
5250                 trace_event_follow_fork(tr, enabled);
5251
5252         if (mask == TRACE_ITER_FUNC_FORK)
5253                 ftrace_pid_follow_fork(tr, enabled);
5254
5255         if (mask == TRACE_ITER_OVERWRITE) {
5256                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5257 #ifdef CONFIG_TRACER_MAX_TRACE
5258                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5259 #endif
5260         }
5261
5262         if (mask == TRACE_ITER_PRINTK) {
5263                 trace_printk_start_stop_comm(enabled);
5264                 trace_printk_control(enabled);
5265         }
5266
5267         return 0;
5268 }
5269
5270 int trace_set_options(struct trace_array *tr, char *option)
5271 {
5272         char *cmp;
5273         int neg = 0;
5274         int ret;
5275         size_t orig_len = strlen(option);
5276         int len;
5277
5278         cmp = strstrip(option);
5279
5280         len = str_has_prefix(cmp, "no");
5281         if (len)
5282                 neg = 1;
5283
5284         cmp += len;
5285
5286         mutex_lock(&event_mutex);
5287         mutex_lock(&trace_types_lock);
5288
5289         ret = match_string(trace_options, -1, cmp);
5290         /* If no option could be set, test the specific tracer options */
5291         if (ret < 0)
5292                 ret = set_tracer_option(tr, cmp, neg);
5293         else
5294                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5295
5296         mutex_unlock(&trace_types_lock);
5297         mutex_unlock(&event_mutex);
5298
5299         /*
5300          * If the first trailing whitespace is replaced with '\0' by strstrip,
5301          * turn it back into a space.
5302          */
5303         if (orig_len > strlen(option))
5304                 option[strlen(option)] = ' ';
5305
5306         return ret;
5307 }
5308
5309 static void __init apply_trace_boot_options(void)
5310 {
5311         char *buf = trace_boot_options_buf;
5312         char *option;
5313
5314         while (true) {
5315                 option = strsep(&buf, ",");
5316
5317                 if (!option)
5318                         break;
5319
5320                 if (*option)
5321                         trace_set_options(&global_trace, option);
5322
5323                 /* Put back the comma to allow this to be called again */
5324                 if (buf)
5325                         *(buf - 1) = ',';
5326         }
5327 }
5328
5329 static ssize_t
5330 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5331                         size_t cnt, loff_t *ppos)
5332 {
5333         struct seq_file *m = filp->private_data;
5334         struct trace_array *tr = m->private;
5335         char buf[64];
5336         int ret;
5337
5338         if (cnt >= sizeof(buf))
5339                 return -EINVAL;
5340
5341         if (copy_from_user(buf, ubuf, cnt))
5342                 return -EFAULT;
5343
5344         buf[cnt] = 0;
5345
5346         ret = trace_set_options(tr, buf);
5347         if (ret < 0)
5348                 return ret;
5349
5350         *ppos += cnt;
5351
5352         return cnt;
5353 }
5354
5355 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5356 {
5357         struct trace_array *tr = inode->i_private;
5358         int ret;
5359
5360         ret = tracing_check_open_get_tr(tr);
5361         if (ret)
5362                 return ret;
5363
5364         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5365         if (ret < 0)
5366                 trace_array_put(tr);
5367
5368         return ret;
5369 }
5370
5371 static const struct file_operations tracing_iter_fops = {
5372         .open           = tracing_trace_options_open,
5373         .read           = seq_read,
5374         .llseek         = seq_lseek,
5375         .release        = tracing_single_release_tr,
5376         .write          = tracing_trace_options_write,
5377 };
5378
5379 static const char readme_msg[] =
5380         "tracing mini-HOWTO:\n\n"
5381         "By default tracefs removes all OTH file permission bits.\n"
5382         "When mounting tracefs an optional group id can be specified\n"
5383         "which adds the group to every directory and file in tracefs:\n\n"
5384         "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5385         "# echo 0 > tracing_on : quick way to disable tracing\n"
5386         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5387         " Important files:\n"
5388         "  trace\t\t\t- The static contents of the buffer\n"
5389         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5390         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5391         "  current_tracer\t- function and latency tracers\n"
5392         "  available_tracers\t- list of configured tracers for current_tracer\n"
5393         "  error_log\t- error log for failed commands (that support it)\n"
5394         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5395         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5396         "  trace_clock\t\t- change the clock used to order events\n"
5397         "       local:   Per cpu clock but may not be synced across CPUs\n"
5398         "      global:   Synced across CPUs but slows tracing down.\n"
5399         "     counter:   Not a clock, but just an increment\n"
5400         "      uptime:   Jiffy counter from time of boot\n"
5401         "        perf:   Same clock that perf events use\n"
5402 #ifdef CONFIG_X86_64
5403         "     x86-tsc:   TSC cycle counter\n"
5404 #endif
5405         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5406         "       delta:   Delta difference against a buffer-wide timestamp\n"
5407         "    absolute:   Absolute (standalone) timestamp\n"
5408         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5409         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5410         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5411         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5412         "\t\t\t  Remove sub-buffer with rmdir\n"
5413         "  trace_options\t\t- Set format or modify how tracing happens\n"
5414         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5415         "\t\t\t  option name\n"
5416         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5417 #ifdef CONFIG_DYNAMIC_FTRACE
5418         "\n  available_filter_functions - list of functions that can be filtered on\n"
5419         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5420         "\t\t\t  functions\n"
5421         "\t     accepts: func_full_name or glob-matching-pattern\n"
5422         "\t     modules: Can select a group via module\n"
5423         "\t      Format: :mod:<module-name>\n"
5424         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5425         "\t    triggers: a command to perform when function is hit\n"
5426         "\t      Format: <function>:<trigger>[:count]\n"
5427         "\t     trigger: traceon, traceoff\n"
5428         "\t\t      enable_event:<system>:<event>\n"
5429         "\t\t      disable_event:<system>:<event>\n"
5430 #ifdef CONFIG_STACKTRACE
5431         "\t\t      stacktrace\n"
5432 #endif
5433 #ifdef CONFIG_TRACER_SNAPSHOT
5434         "\t\t      snapshot\n"
5435 #endif
5436         "\t\t      dump\n"
5437         "\t\t      cpudump\n"
5438         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5439         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5440         "\t     The first one will disable tracing every time do_fault is hit\n"
5441         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5442         "\t       The first time do trap is hit and it disables tracing, the\n"
5443         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5444         "\t       the counter will not decrement. It only decrements when the\n"
5445         "\t       trigger did work\n"
5446         "\t     To remove trigger without count:\n"
5447         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5448         "\t     To remove trigger with a count:\n"
5449         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5450         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5451         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5452         "\t    modules: Can select a group via module command :mod:\n"
5453         "\t    Does not accept triggers\n"
5454 #endif /* CONFIG_DYNAMIC_FTRACE */
5455 #ifdef CONFIG_FUNCTION_TRACER
5456         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5457         "\t\t    (function)\n"
5458         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5459         "\t\t    (function)\n"
5460 #endif
5461 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5462         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5463         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5464         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5465 #endif
5466 #ifdef CONFIG_TRACER_SNAPSHOT
5467         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5468         "\t\t\t  snapshot buffer. Read the contents for more\n"
5469         "\t\t\t  information\n"
5470 #endif
5471 #ifdef CONFIG_STACK_TRACER
5472         "  stack_trace\t\t- Shows the max stack trace when active\n"
5473         "  stack_max_size\t- Shows current max stack size that was traced\n"
5474         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5475         "\t\t\t  new trace)\n"
5476 #ifdef CONFIG_DYNAMIC_FTRACE
5477         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5478         "\t\t\t  traces\n"
5479 #endif
5480 #endif /* CONFIG_STACK_TRACER */
5481 #ifdef CONFIG_DYNAMIC_EVENTS
5482         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5483         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5484 #endif
5485 #ifdef CONFIG_KPROBE_EVENTS
5486         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5487         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5488 #endif
5489 #ifdef CONFIG_UPROBE_EVENTS
5490         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5491         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5492 #endif
5493 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5494     defined(CONFIG_FPROBE_EVENTS)
5495         "\t  accepts: event-definitions (one definition per line)\n"
5496 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5497         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5498         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5499 #endif
5500 #ifdef CONFIG_FPROBE_EVENTS
5501         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5502         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5503 #endif
5504 #ifdef CONFIG_HIST_TRIGGERS
5505         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5506 #endif
5507         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5508         "\t           -:[<group>/][<event>]\n"
5509 #ifdef CONFIG_KPROBE_EVENTS
5510         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5511   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5512 #endif
5513 #ifdef CONFIG_UPROBE_EVENTS
5514   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5515 #endif
5516         "\t     args: <name>=fetcharg[:type]\n"
5517         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5518 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5519         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5520 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5521         "\t           <argname>[->field[->field|.field...]],\n"
5522 #endif
5523 #else
5524         "\t           $stack<index>, $stack, $retval, $comm,\n"
5525 #endif
5526         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5527         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5528         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5529         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5530         "\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532         "\t    field: <stype> <name>;\n"
5533         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5534         "\t           [unsigned] char/int/long\n"
5535 #endif
5536         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5537         "\t            of the <attached-group>/<attached-event>.\n"
5538 #endif
5539         "  events/\t\t- Directory containing all trace event subsystems:\n"
5540         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5541         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5542         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5543         "\t\t\t  events\n"
5544         "      filter\t\t- If set, only events passing filter are traced\n"
5545         "  events/<system>/<event>/\t- Directory containing control files for\n"
5546         "\t\t\t  <event>:\n"
5547         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5548         "      filter\t\t- If set, only events passing filter are traced\n"
5549         "      trigger\t\t- If set, a command to perform when event is hit\n"
5550         "\t    Format: <trigger>[:count][if <filter>]\n"
5551         "\t   trigger: traceon, traceoff\n"
5552         "\t            enable_event:<system>:<event>\n"
5553         "\t            disable_event:<system>:<event>\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555         "\t            enable_hist:<system>:<event>\n"
5556         "\t            disable_hist:<system>:<event>\n"
5557 #endif
5558 #ifdef CONFIG_STACKTRACE
5559         "\t\t    stacktrace\n"
5560 #endif
5561 #ifdef CONFIG_TRACER_SNAPSHOT
5562         "\t\t    snapshot\n"
5563 #endif
5564 #ifdef CONFIG_HIST_TRIGGERS
5565         "\t\t    hist (see below)\n"
5566 #endif
5567         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5568         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5569         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5570         "\t                  events/block/block_unplug/trigger\n"
5571         "\t   The first disables tracing every time block_unplug is hit.\n"
5572         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5573         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5574         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5575         "\t   Like function triggers, the counter is only decremented if it\n"
5576         "\t    enabled or disabled tracing.\n"
5577         "\t   To remove a trigger without a count:\n"
5578         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5579         "\t   To remove a trigger with a count:\n"
5580         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5581         "\t   Filters can be ignored when removing a trigger.\n"
5582 #ifdef CONFIG_HIST_TRIGGERS
5583         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5584         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5585         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5586         "\t            [:values=<field1[,field2,...]>]\n"
5587         "\t            [:sort=<field1[,field2,...]>]\n"
5588         "\t            [:size=#entries]\n"
5589         "\t            [:pause][:continue][:clear]\n"
5590         "\t            [:name=histname1]\n"
5591         "\t            [:nohitcount]\n"
5592         "\t            [:<handler>.<action>]\n"
5593         "\t            [if <filter>]\n\n"
5594         "\t    Note, special fields can be used as well:\n"
5595         "\t            common_timestamp - to record current timestamp\n"
5596         "\t            common_cpu - to record the CPU the event happened on\n"
5597         "\n"
5598         "\t    A hist trigger variable can be:\n"
5599         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5600         "\t        - a reference to another variable e.g. y=$x,\n"
5601         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5602         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5603         "\n"
5604         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5605         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5606         "\t    variable reference, field or numeric literal.\n"
5607         "\n"
5608         "\t    When a matching event is hit, an entry is added to a hash\n"
5609         "\t    table using the key(s) and value(s) named, and the value of a\n"
5610         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5611         "\t    correspond to fields in the event's format description.  Keys\n"
5612         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5613         "\t    Compound keys consisting of up to two fields can be specified\n"
5614         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5615         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5616         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5617         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5618         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5619         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5620         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5621         "\t    its histogram data will be shared with other triggers of the\n"
5622         "\t    same name, and trigger hits will update this common data.\n\n"
5623         "\t    Reading the 'hist' file for the event will dump the hash\n"
5624         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5625         "\t    triggers attached to an event, there will be a table for each\n"
5626         "\t    trigger in the output.  The table displayed for a named\n"
5627         "\t    trigger will be the same as any other instance having the\n"
5628         "\t    same name.  The default format used to display a given field\n"
5629         "\t    can be modified by appending any of the following modifiers\n"
5630         "\t    to the field name, as applicable:\n\n"
5631         "\t            .hex        display a number as a hex value\n"
5632         "\t            .sym        display an address as a symbol\n"
5633         "\t            .sym-offset display an address as a symbol and offset\n"
5634         "\t            .execname   display a common_pid as a program name\n"
5635         "\t            .syscall    display a syscall id as a syscall name\n"
5636         "\t            .log2       display log2 value rather than raw number\n"
5637         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5638         "\t            .usecs      display a common_timestamp in microseconds\n"
5639         "\t            .percent    display a number of percentage value\n"
5640         "\t            .graph      display a bar-graph of a value\n\n"
5641         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5642         "\t    trigger or to start a hist trigger but not log any events\n"
5643         "\t    until told to do so.  'continue' can be used to start or\n"
5644         "\t    restart a paused hist trigger.\n\n"
5645         "\t    The 'clear' parameter will clear the contents of a running\n"
5646         "\t    hist trigger and leave its current paused/active state\n"
5647         "\t    unchanged.\n\n"
5648         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5649         "\t    raw hitcount in the histogram.\n\n"
5650         "\t    The enable_hist and disable_hist triggers can be used to\n"
5651         "\t    have one event conditionally start and stop another event's\n"
5652         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5653         "\t    the enable_event and disable_event triggers.\n\n"
5654         "\t    Hist trigger handlers and actions are executed whenever a\n"
5655         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5656         "\t        <handler>.<action>\n\n"
5657         "\t    The available handlers are:\n\n"
5658         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5659         "\t        onmax(var)               - invoke if var exceeds current max\n"
5660         "\t        onchange(var)            - invoke action if var changes\n\n"
5661         "\t    The available actions are:\n\n"
5662         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5663         "\t        save(field,...)                      - save current event fields\n"
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5666 #endif
5667 #ifdef CONFIG_SYNTH_EVENTS
5668         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5669         "\t  Write into this file to define/undefine new synthetic events.\n"
5670         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5671 #endif
5672 #endif
5673 ;
5674
5675 static ssize_t
5676 tracing_readme_read(struct file *filp, char __user *ubuf,
5677                        size_t cnt, loff_t *ppos)
5678 {
5679         return simple_read_from_buffer(ubuf, cnt, ppos,
5680                                         readme_msg, strlen(readme_msg));
5681 }
5682
5683 static const struct file_operations tracing_readme_fops = {
5684         .open           = tracing_open_generic,
5685         .read           = tracing_readme_read,
5686         .llseek         = generic_file_llseek,
5687 };
5688
5689 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5690 static union trace_eval_map_item *
5691 update_eval_map(union trace_eval_map_item *ptr)
5692 {
5693         if (!ptr->map.eval_string) {
5694                 if (ptr->tail.next) {
5695                         ptr = ptr->tail.next;
5696                         /* Set ptr to the next real item (skip head) */
5697                         ptr++;
5698                 } else
5699                         return NULL;
5700         }
5701         return ptr;
5702 }
5703
5704 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5705 {
5706         union trace_eval_map_item *ptr = v;
5707
5708         /*
5709          * Paranoid! If ptr points to end, we don't want to increment past it.
5710          * This really should never happen.
5711          */
5712         (*pos)++;
5713         ptr = update_eval_map(ptr);
5714         if (WARN_ON_ONCE(!ptr))
5715                 return NULL;
5716
5717         ptr++;
5718         ptr = update_eval_map(ptr);
5719
5720         return ptr;
5721 }
5722
5723 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5724 {
5725         union trace_eval_map_item *v;
5726         loff_t l = 0;
5727
5728         mutex_lock(&trace_eval_mutex);
5729
5730         v = trace_eval_maps;
5731         if (v)
5732                 v++;
5733
5734         while (v && l < *pos) {
5735                 v = eval_map_next(m, v, &l);
5736         }
5737
5738         return v;
5739 }
5740
5741 static void eval_map_stop(struct seq_file *m, void *v)
5742 {
5743         mutex_unlock(&trace_eval_mutex);
5744 }
5745
5746 static int eval_map_show(struct seq_file *m, void *v)
5747 {
5748         union trace_eval_map_item *ptr = v;
5749
5750         seq_printf(m, "%s %ld (%s)\n",
5751                    ptr->map.eval_string, ptr->map.eval_value,
5752                    ptr->map.system);
5753
5754         return 0;
5755 }
5756
5757 static const struct seq_operations tracing_eval_map_seq_ops = {
5758         .start          = eval_map_start,
5759         .next           = eval_map_next,
5760         .stop           = eval_map_stop,
5761         .show           = eval_map_show,
5762 };
5763
5764 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5765 {
5766         int ret;
5767
5768         ret = tracing_check_open_get_tr(NULL);
5769         if (ret)
5770                 return ret;
5771
5772         return seq_open(filp, &tracing_eval_map_seq_ops);
5773 }
5774
5775 static const struct file_operations tracing_eval_map_fops = {
5776         .open           = tracing_eval_map_open,
5777         .read           = seq_read,
5778         .llseek         = seq_lseek,
5779         .release        = seq_release,
5780 };
5781
5782 static inline union trace_eval_map_item *
5783 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5784 {
5785         /* Return tail of array given the head */
5786         return ptr + ptr->head.length + 1;
5787 }
5788
5789 static void
5790 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5791                            int len)
5792 {
5793         struct trace_eval_map **stop;
5794         struct trace_eval_map **map;
5795         union trace_eval_map_item *map_array;
5796         union trace_eval_map_item *ptr;
5797
5798         stop = start + len;
5799
5800         /*
5801          * The trace_eval_maps contains the map plus a head and tail item,
5802          * where the head holds the module and length of array, and the
5803          * tail holds a pointer to the next list.
5804          */
5805         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5806         if (!map_array) {
5807                 pr_warn("Unable to allocate trace eval mapping\n");
5808                 return;
5809         }
5810
5811         mutex_lock(&trace_eval_mutex);
5812
5813         if (!trace_eval_maps)
5814                 trace_eval_maps = map_array;
5815         else {
5816                 ptr = trace_eval_maps;
5817                 for (;;) {
5818                         ptr = trace_eval_jmp_to_tail(ptr);
5819                         if (!ptr->tail.next)
5820                                 break;
5821                         ptr = ptr->tail.next;
5822
5823                 }
5824                 ptr->tail.next = map_array;
5825         }
5826         map_array->head.mod = mod;
5827         map_array->head.length = len;
5828         map_array++;
5829
5830         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5831                 map_array->map = **map;
5832                 map_array++;
5833         }
5834         memset(map_array, 0, sizeof(*map_array));
5835
5836         mutex_unlock(&trace_eval_mutex);
5837 }
5838
5839 static void trace_create_eval_file(struct dentry *d_tracer)
5840 {
5841         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5842                           NULL, &tracing_eval_map_fops);
5843 }
5844
5845 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5846 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5847 static inline void trace_insert_eval_map_file(struct module *mod,
5848                               struct trace_eval_map **start, int len) { }
5849 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5850
5851 static void trace_insert_eval_map(struct module *mod,
5852                                   struct trace_eval_map **start, int len)
5853 {
5854         struct trace_eval_map **map;
5855
5856         if (len <= 0)
5857                 return;
5858
5859         map = start;
5860
5861         trace_event_eval_update(map, len);
5862
5863         trace_insert_eval_map_file(mod, start, len);
5864 }
5865
5866 static ssize_t
5867 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5868                        size_t cnt, loff_t *ppos)
5869 {
5870         struct trace_array *tr = filp->private_data;
5871         char buf[MAX_TRACER_SIZE+2];
5872         int r;
5873
5874         mutex_lock(&trace_types_lock);
5875         r = sprintf(buf, "%s\n", tr->current_trace->name);
5876         mutex_unlock(&trace_types_lock);
5877
5878         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5879 }
5880
5881 int tracer_init(struct tracer *t, struct trace_array *tr)
5882 {
5883         tracing_reset_online_cpus(&tr->array_buffer);
5884         return t->init(tr);
5885 }
5886
5887 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5888 {
5889         int cpu;
5890
5891         for_each_tracing_cpu(cpu)
5892                 per_cpu_ptr(buf->data, cpu)->entries = val;
5893 }
5894
5895 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5896 {
5897         if (cpu == RING_BUFFER_ALL_CPUS) {
5898                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5899         } else {
5900                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5901         }
5902 }
5903
5904 #ifdef CONFIG_TRACER_MAX_TRACE
5905 /* resize @tr's buffer to the size of @size_tr's entries */
5906 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5907                                         struct array_buffer *size_buf, int cpu_id)
5908 {
5909         int cpu, ret = 0;
5910
5911         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5912                 for_each_tracing_cpu(cpu) {
5913                         ret = ring_buffer_resize(trace_buf->buffer,
5914                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5915                         if (ret < 0)
5916                                 break;
5917                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5918                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5919                 }
5920         } else {
5921                 ret = ring_buffer_resize(trace_buf->buffer,
5922                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5923                 if (ret == 0)
5924                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5925                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5926         }
5927
5928         return ret;
5929 }
5930 #endif /* CONFIG_TRACER_MAX_TRACE */
5931
5932 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5933                                         unsigned long size, int cpu)
5934 {
5935         int ret;
5936
5937         /*
5938          * If kernel or user changes the size of the ring buffer
5939          * we use the size that was given, and we can forget about
5940          * expanding it later.
5941          */
5942         trace_set_ring_buffer_expanded(tr);
5943
5944         /* May be called before buffers are initialized */
5945         if (!tr->array_buffer.buffer)
5946                 return 0;
5947
5948         /* Do not allow tracing while resizing ring buffer */
5949         tracing_stop_tr(tr);
5950
5951         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5952         if (ret < 0)
5953                 goto out_start;
5954
5955 #ifdef CONFIG_TRACER_MAX_TRACE
5956         if (!tr->allocated_snapshot)
5957                 goto out;
5958
5959         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5960         if (ret < 0) {
5961                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5962                                                      &tr->array_buffer, cpu);
5963                 if (r < 0) {
5964                         /*
5965                          * AARGH! We are left with different
5966                          * size max buffer!!!!
5967                          * The max buffer is our "snapshot" buffer.
5968                          * When a tracer needs a snapshot (one of the
5969                          * latency tracers), it swaps the max buffer
5970                          * with the saved snap shot. We succeeded to
5971                          * update the size of the main buffer, but failed to
5972                          * update the size of the max buffer. But when we tried
5973                          * to reset the main buffer to the original size, we
5974                          * failed there too. This is very unlikely to
5975                          * happen, but if it does, warn and kill all
5976                          * tracing.
5977                          */
5978                         WARN_ON(1);
5979                         tracing_disabled = 1;
5980                 }
5981                 goto out_start;
5982         }
5983
5984         update_buffer_entries(&tr->max_buffer, cpu);
5985
5986  out:
5987 #endif /* CONFIG_TRACER_MAX_TRACE */
5988
5989         update_buffer_entries(&tr->array_buffer, cpu);
5990  out_start:
5991         tracing_start_tr(tr);
5992         return ret;
5993 }
5994
5995 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5996                                   unsigned long size, int cpu_id)
5997 {
5998         int ret;
5999
6000         mutex_lock(&trace_types_lock);
6001
6002         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6003                 /* make sure, this cpu is enabled in the mask */
6004                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6005                         ret = -EINVAL;
6006                         goto out;
6007                 }
6008         }
6009
6010         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6011         if (ret < 0)
6012                 ret = -ENOMEM;
6013
6014 out:
6015         mutex_unlock(&trace_types_lock);
6016
6017         return ret;
6018 }
6019
6020 static void update_last_data(struct trace_array *tr)
6021 {
6022         if (!tr->text_delta && !tr->data_delta)
6023                 return;
6024
6025         /*
6026          * Need to clear all CPU buffers as there cannot be events
6027          * from the previous boot mixed with events with this boot
6028          * as that will cause a confusing trace. Need to clear all
6029          * CPU buffers, even for those that may currently be offline.
6030          */
6031         tracing_reset_all_cpus(&tr->array_buffer);
6032
6033         /* Using current data now */
6034         tr->text_delta = 0;
6035         tr->data_delta = 0;
6036 }
6037
6038 /**
6039  * tracing_update_buffers - used by tracing facility to expand ring buffers
6040  * @tr: The tracing instance
6041  *
6042  * To save on memory when the tracing is never used on a system with it
6043  * configured in. The ring buffers are set to a minimum size. But once
6044  * a user starts to use the tracing facility, then they need to grow
6045  * to their default size.
6046  *
6047  * This function is to be called when a tracer is about to be used.
6048  */
6049 int tracing_update_buffers(struct trace_array *tr)
6050 {
6051         int ret = 0;
6052
6053         mutex_lock(&trace_types_lock);
6054
6055         update_last_data(tr);
6056
6057         if (!tr->ring_buffer_expanded)
6058                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6059                                                 RING_BUFFER_ALL_CPUS);
6060         mutex_unlock(&trace_types_lock);
6061
6062         return ret;
6063 }
6064
6065 struct trace_option_dentry;
6066
6067 static void
6068 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6069
6070 /*
6071  * Used to clear out the tracer before deletion of an instance.
6072  * Must have trace_types_lock held.
6073  */
6074 static void tracing_set_nop(struct trace_array *tr)
6075 {
6076         if (tr->current_trace == &nop_trace)
6077                 return;
6078
6079         tr->current_trace->enabled--;
6080
6081         if (tr->current_trace->reset)
6082                 tr->current_trace->reset(tr);
6083
6084         tr->current_trace = &nop_trace;
6085 }
6086
6087 static bool tracer_options_updated;
6088
6089 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6090 {
6091         /* Only enable if the directory has been created already. */
6092         if (!tr->dir)
6093                 return;
6094
6095         /* Only create trace option files after update_tracer_options finish */
6096         if (!tracer_options_updated)
6097                 return;
6098
6099         create_trace_option_files(tr, t);
6100 }
6101
6102 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6103 {
6104         struct tracer *t;
6105 #ifdef CONFIG_TRACER_MAX_TRACE
6106         bool had_max_tr;
6107 #endif
6108         int ret = 0;
6109
6110         mutex_lock(&trace_types_lock);
6111
6112         update_last_data(tr);
6113
6114         if (!tr->ring_buffer_expanded) {
6115                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6116                                                 RING_BUFFER_ALL_CPUS);
6117                 if (ret < 0)
6118                         goto out;
6119                 ret = 0;
6120         }
6121
6122         for (t = trace_types; t; t = t->next) {
6123                 if (strcmp(t->name, buf) == 0)
6124                         break;
6125         }
6126         if (!t) {
6127                 ret = -EINVAL;
6128                 goto out;
6129         }
6130         if (t == tr->current_trace)
6131                 goto out;
6132
6133 #ifdef CONFIG_TRACER_SNAPSHOT
6134         if (t->use_max_tr) {
6135                 local_irq_disable();
6136                 arch_spin_lock(&tr->max_lock);
6137                 if (tr->cond_snapshot)
6138                         ret = -EBUSY;
6139                 arch_spin_unlock(&tr->max_lock);
6140                 local_irq_enable();
6141                 if (ret)
6142                         goto out;
6143         }
6144 #endif
6145         /* Some tracers won't work on kernel command line */
6146         if (system_state < SYSTEM_RUNNING && t->noboot) {
6147                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6148                         t->name);
6149                 goto out;
6150         }
6151
6152         /* Some tracers are only allowed for the top level buffer */
6153         if (!trace_ok_for_array(t, tr)) {
6154                 ret = -EINVAL;
6155                 goto out;
6156         }
6157
6158         /* If trace pipe files are being read, we can't change the tracer */
6159         if (tr->trace_ref) {
6160                 ret = -EBUSY;
6161                 goto out;
6162         }
6163
6164         trace_branch_disable();
6165
6166         tr->current_trace->enabled--;
6167
6168         if (tr->current_trace->reset)
6169                 tr->current_trace->reset(tr);
6170
6171 #ifdef CONFIG_TRACER_MAX_TRACE
6172         had_max_tr = tr->current_trace->use_max_tr;
6173
6174         /* Current trace needs to be nop_trace before synchronize_rcu */
6175         tr->current_trace = &nop_trace;
6176
6177         if (had_max_tr && !t->use_max_tr) {
6178                 /*
6179                  * We need to make sure that the update_max_tr sees that
6180                  * current_trace changed to nop_trace to keep it from
6181                  * swapping the buffers after we resize it.
6182                  * The update_max_tr is called from interrupts disabled
6183                  * so a synchronized_sched() is sufficient.
6184                  */
6185                 synchronize_rcu();
6186                 free_snapshot(tr);
6187                 tracing_disarm_snapshot(tr);
6188         }
6189
6190         if (!had_max_tr && t->use_max_tr) {
6191                 ret = tracing_arm_snapshot_locked(tr);
6192                 if (ret)
6193                         goto out;
6194         }
6195 #else
6196         tr->current_trace = &nop_trace;
6197 #endif
6198
6199         if (t->init) {
6200                 ret = tracer_init(t, tr);
6201                 if (ret) {
6202 #ifdef CONFIG_TRACER_MAX_TRACE
6203                         if (t->use_max_tr)
6204                                 tracing_disarm_snapshot(tr);
6205 #endif
6206                         goto out;
6207                 }
6208         }
6209
6210         tr->current_trace = t;
6211         tr->current_trace->enabled++;
6212         trace_branch_enable(tr);
6213  out:
6214         mutex_unlock(&trace_types_lock);
6215
6216         return ret;
6217 }
6218
6219 static ssize_t
6220 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6221                         size_t cnt, loff_t *ppos)
6222 {
6223         struct trace_array *tr = filp->private_data;
6224         char buf[MAX_TRACER_SIZE+1];
6225         char *name;
6226         size_t ret;
6227         int err;
6228
6229         ret = cnt;
6230
6231         if (cnt > MAX_TRACER_SIZE)
6232                 cnt = MAX_TRACER_SIZE;
6233
6234         if (copy_from_user(buf, ubuf, cnt))
6235                 return -EFAULT;
6236
6237         buf[cnt] = 0;
6238
6239         name = strim(buf);
6240
6241         err = tracing_set_tracer(tr, name);
6242         if (err)
6243                 return err;
6244
6245         *ppos += ret;
6246
6247         return ret;
6248 }
6249
6250 static ssize_t
6251 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6252                    size_t cnt, loff_t *ppos)
6253 {
6254         char buf[64];
6255         int r;
6256
6257         r = snprintf(buf, sizeof(buf), "%ld\n",
6258                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6259         if (r > sizeof(buf))
6260                 r = sizeof(buf);
6261         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6262 }
6263
6264 static ssize_t
6265 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6266                     size_t cnt, loff_t *ppos)
6267 {
6268         unsigned long val;
6269         int ret;
6270
6271         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6272         if (ret)
6273                 return ret;
6274
6275         *ptr = val * 1000;
6276
6277         return cnt;
6278 }
6279
6280 static ssize_t
6281 tracing_thresh_read(struct file *filp, char __user *ubuf,
6282                     size_t cnt, loff_t *ppos)
6283 {
6284         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6285 }
6286
6287 static ssize_t
6288 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6289                      size_t cnt, loff_t *ppos)
6290 {
6291         struct trace_array *tr = filp->private_data;
6292         int ret;
6293
6294         mutex_lock(&trace_types_lock);
6295         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6296         if (ret < 0)
6297                 goto out;
6298
6299         if (tr->current_trace->update_thresh) {
6300                 ret = tr->current_trace->update_thresh(tr);
6301                 if (ret < 0)
6302                         goto out;
6303         }
6304
6305         ret = cnt;
6306 out:
6307         mutex_unlock(&trace_types_lock);
6308
6309         return ret;
6310 }
6311
6312 #ifdef CONFIG_TRACER_MAX_TRACE
6313
6314 static ssize_t
6315 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6316                      size_t cnt, loff_t *ppos)
6317 {
6318         struct trace_array *tr = filp->private_data;
6319
6320         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6321 }
6322
6323 static ssize_t
6324 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6325                       size_t cnt, loff_t *ppos)
6326 {
6327         struct trace_array *tr = filp->private_data;
6328
6329         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6330 }
6331
6332 #endif
6333
6334 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6335 {
6336         if (cpu == RING_BUFFER_ALL_CPUS) {
6337                 if (cpumask_empty(tr->pipe_cpumask)) {
6338                         cpumask_setall(tr->pipe_cpumask);
6339                         return 0;
6340                 }
6341         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6342                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6343                 return 0;
6344         }
6345         return -EBUSY;
6346 }
6347
6348 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6349 {
6350         if (cpu == RING_BUFFER_ALL_CPUS) {
6351                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6352                 cpumask_clear(tr->pipe_cpumask);
6353         } else {
6354                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6355                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6356         }
6357 }
6358
6359 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6360 {
6361         struct trace_array *tr = inode->i_private;
6362         struct trace_iterator *iter;
6363         int cpu;
6364         int ret;
6365
6366         ret = tracing_check_open_get_tr(tr);
6367         if (ret)
6368                 return ret;
6369
6370         mutex_lock(&trace_types_lock);
6371         cpu = tracing_get_cpu(inode);
6372         ret = open_pipe_on_cpu(tr, cpu);
6373         if (ret)
6374                 goto fail_pipe_on_cpu;
6375
6376         /* create a buffer to store the information to pass to userspace */
6377         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6378         if (!iter) {
6379                 ret = -ENOMEM;
6380                 goto fail_alloc_iter;
6381         }
6382
6383         trace_seq_init(&iter->seq);
6384         iter->trace = tr->current_trace;
6385
6386         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6387                 ret = -ENOMEM;
6388                 goto fail;
6389         }
6390
6391         /* trace pipe does not show start of buffer */
6392         cpumask_setall(iter->started);
6393
6394         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6395                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6396
6397         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6398         if (trace_clocks[tr->clock_id].in_ns)
6399                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6400
6401         iter->tr = tr;
6402         iter->array_buffer = &tr->array_buffer;
6403         iter->cpu_file = cpu;
6404         mutex_init(&iter->mutex);
6405         filp->private_data = iter;
6406
6407         if (iter->trace->pipe_open)
6408                 iter->trace->pipe_open(iter);
6409
6410         nonseekable_open(inode, filp);
6411
6412         tr->trace_ref++;
6413
6414         mutex_unlock(&trace_types_lock);
6415         return ret;
6416
6417 fail:
6418         kfree(iter);
6419 fail_alloc_iter:
6420         close_pipe_on_cpu(tr, cpu);
6421 fail_pipe_on_cpu:
6422         __trace_array_put(tr);
6423         mutex_unlock(&trace_types_lock);
6424         return ret;
6425 }
6426
6427 static int tracing_release_pipe(struct inode *inode, struct file *file)
6428 {
6429         struct trace_iterator *iter = file->private_data;
6430         struct trace_array *tr = inode->i_private;
6431
6432         mutex_lock(&trace_types_lock);
6433
6434         tr->trace_ref--;
6435
6436         if (iter->trace->pipe_close)
6437                 iter->trace->pipe_close(iter);
6438         close_pipe_on_cpu(tr, iter->cpu_file);
6439         mutex_unlock(&trace_types_lock);
6440
6441         free_trace_iter_content(iter);
6442         kfree(iter);
6443
6444         trace_array_put(tr);
6445
6446         return 0;
6447 }
6448
6449 static __poll_t
6450 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6451 {
6452         struct trace_array *tr = iter->tr;
6453
6454         /* Iterators are static, they should be filled or empty */
6455         if (trace_buffer_iter(iter, iter->cpu_file))
6456                 return EPOLLIN | EPOLLRDNORM;
6457
6458         if (tr->trace_flags & TRACE_ITER_BLOCK)
6459                 /*
6460                  * Always select as readable when in blocking mode
6461                  */
6462                 return EPOLLIN | EPOLLRDNORM;
6463         else
6464                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6465                                              filp, poll_table, iter->tr->buffer_percent);
6466 }
6467
6468 static __poll_t
6469 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6470 {
6471         struct trace_iterator *iter = filp->private_data;
6472
6473         return trace_poll(iter, filp, poll_table);
6474 }
6475
6476 /* Must be called with iter->mutex held. */
6477 static int tracing_wait_pipe(struct file *filp)
6478 {
6479         struct trace_iterator *iter = filp->private_data;
6480         int ret;
6481
6482         while (trace_empty(iter)) {
6483
6484                 if ((filp->f_flags & O_NONBLOCK)) {
6485                         return -EAGAIN;
6486                 }
6487
6488                 /*
6489                  * We block until we read something and tracing is disabled.
6490                  * We still block if tracing is disabled, but we have never
6491                  * read anything. This allows a user to cat this file, and
6492                  * then enable tracing. But after we have read something,
6493                  * we give an EOF when tracing is again disabled.
6494                  *
6495                  * iter->pos will be 0 if we haven't read anything.
6496                  */
6497                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6498                         break;
6499
6500                 mutex_unlock(&iter->mutex);
6501
6502                 ret = wait_on_pipe(iter, 0);
6503
6504                 mutex_lock(&iter->mutex);
6505
6506                 if (ret)
6507                         return ret;
6508         }
6509
6510         return 1;
6511 }
6512
6513 /*
6514  * Consumer reader.
6515  */
6516 static ssize_t
6517 tracing_read_pipe(struct file *filp, char __user *ubuf,
6518                   size_t cnt, loff_t *ppos)
6519 {
6520         struct trace_iterator *iter = filp->private_data;
6521         ssize_t sret;
6522
6523         /*
6524          * Avoid more than one consumer on a single file descriptor
6525          * This is just a matter of traces coherency, the ring buffer itself
6526          * is protected.
6527          */
6528         mutex_lock(&iter->mutex);
6529
6530         /* return any leftover data */
6531         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6532         if (sret != -EBUSY)
6533                 goto out;
6534
6535         trace_seq_init(&iter->seq);
6536
6537         if (iter->trace->read) {
6538                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6539                 if (sret)
6540                         goto out;
6541         }
6542
6543 waitagain:
6544         sret = tracing_wait_pipe(filp);
6545         if (sret <= 0)
6546                 goto out;
6547
6548         /* stop when tracing is finished */
6549         if (trace_empty(iter)) {
6550                 sret = 0;
6551                 goto out;
6552         }
6553
6554         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6555                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6556
6557         /* reset all but tr, trace, and overruns */
6558         trace_iterator_reset(iter);
6559         cpumask_clear(iter->started);
6560         trace_seq_init(&iter->seq);
6561
6562         trace_event_read_lock();
6563         trace_access_lock(iter->cpu_file);
6564         while (trace_find_next_entry_inc(iter) != NULL) {
6565                 enum print_line_t ret;
6566                 int save_len = iter->seq.seq.len;
6567
6568                 ret = print_trace_line(iter);
6569                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6570                         /*
6571                          * If one print_trace_line() fills entire trace_seq in one shot,
6572                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6573                          * In this case, we need to consume it, otherwise, loop will peek
6574                          * this event next time, resulting in an infinite loop.
6575                          */
6576                         if (save_len == 0) {
6577                                 iter->seq.full = 0;
6578                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6579                                 trace_consume(iter);
6580                                 break;
6581                         }
6582
6583                         /* In other cases, don't print partial lines */
6584                         iter->seq.seq.len = save_len;
6585                         break;
6586                 }
6587                 if (ret != TRACE_TYPE_NO_CONSUME)
6588                         trace_consume(iter);
6589
6590                 if (trace_seq_used(&iter->seq) >= cnt)
6591                         break;
6592
6593                 /*
6594                  * Setting the full flag means we reached the trace_seq buffer
6595                  * size and we should leave by partial output condition above.
6596                  * One of the trace_seq_* functions is not used properly.
6597                  */
6598                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6599                           iter->ent->type);
6600         }
6601         trace_access_unlock(iter->cpu_file);
6602         trace_event_read_unlock();
6603
6604         /* Now copy what we have to the user */
6605         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6606         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6607                 trace_seq_init(&iter->seq);
6608
6609         /*
6610          * If there was nothing to send to user, in spite of consuming trace
6611          * entries, go back to wait for more entries.
6612          */
6613         if (sret == -EBUSY)
6614                 goto waitagain;
6615
6616 out:
6617         mutex_unlock(&iter->mutex);
6618
6619         return sret;
6620 }
6621
6622 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6623                                      unsigned int idx)
6624 {
6625         __free_page(spd->pages[idx]);
6626 }
6627
6628 static size_t
6629 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6630 {
6631         size_t count;
6632         int save_len;
6633         int ret;
6634
6635         /* Seq buffer is page-sized, exactly what we need. */
6636         for (;;) {
6637                 save_len = iter->seq.seq.len;
6638                 ret = print_trace_line(iter);
6639
6640                 if (trace_seq_has_overflowed(&iter->seq)) {
6641                         iter->seq.seq.len = save_len;
6642                         break;
6643                 }
6644
6645                 /*
6646                  * This should not be hit, because it should only
6647                  * be set if the iter->seq overflowed. But check it
6648                  * anyway to be safe.
6649                  */
6650                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6651                         iter->seq.seq.len = save_len;
6652                         break;
6653                 }
6654
6655                 count = trace_seq_used(&iter->seq) - save_len;
6656                 if (rem < count) {
6657                         rem = 0;
6658                         iter->seq.seq.len = save_len;
6659                         break;
6660                 }
6661
6662                 if (ret != TRACE_TYPE_NO_CONSUME)
6663                         trace_consume(iter);
6664                 rem -= count;
6665                 if (!trace_find_next_entry_inc(iter))   {
6666                         rem = 0;
6667                         iter->ent = NULL;
6668                         break;
6669                 }
6670         }
6671
6672         return rem;
6673 }
6674
6675 static ssize_t tracing_splice_read_pipe(struct file *filp,
6676                                         loff_t *ppos,
6677                                         struct pipe_inode_info *pipe,
6678                                         size_t len,
6679                                         unsigned int flags)
6680 {
6681         struct page *pages_def[PIPE_DEF_BUFFERS];
6682         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6683         struct trace_iterator *iter = filp->private_data;
6684         struct splice_pipe_desc spd = {
6685                 .pages          = pages_def,
6686                 .partial        = partial_def,
6687                 .nr_pages       = 0, /* This gets updated below. */
6688                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6689                 .ops            = &default_pipe_buf_ops,
6690                 .spd_release    = tracing_spd_release_pipe,
6691         };
6692         ssize_t ret;
6693         size_t rem;
6694         unsigned int i;
6695
6696         if (splice_grow_spd(pipe, &spd))
6697                 return -ENOMEM;
6698
6699         mutex_lock(&iter->mutex);
6700
6701         if (iter->trace->splice_read) {
6702                 ret = iter->trace->splice_read(iter, filp,
6703                                                ppos, pipe, len, flags);
6704                 if (ret)
6705                         goto out_err;
6706         }
6707
6708         ret = tracing_wait_pipe(filp);
6709         if (ret <= 0)
6710                 goto out_err;
6711
6712         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6713                 ret = -EFAULT;
6714                 goto out_err;
6715         }
6716
6717         trace_event_read_lock();
6718         trace_access_lock(iter->cpu_file);
6719
6720         /* Fill as many pages as possible. */
6721         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6722                 spd.pages[i] = alloc_page(GFP_KERNEL);
6723                 if (!spd.pages[i])
6724                         break;
6725
6726                 rem = tracing_fill_pipe_page(rem, iter);
6727
6728                 /* Copy the data into the page, so we can start over. */
6729                 ret = trace_seq_to_buffer(&iter->seq,
6730                                           page_address(spd.pages[i]),
6731                                           trace_seq_used(&iter->seq));
6732                 if (ret < 0) {
6733                         __free_page(spd.pages[i]);
6734                         break;
6735                 }
6736                 spd.partial[i].offset = 0;
6737                 spd.partial[i].len = trace_seq_used(&iter->seq);
6738
6739                 trace_seq_init(&iter->seq);
6740         }
6741
6742         trace_access_unlock(iter->cpu_file);
6743         trace_event_read_unlock();
6744         mutex_unlock(&iter->mutex);
6745
6746         spd.nr_pages = i;
6747
6748         if (i)
6749                 ret = splice_to_pipe(pipe, &spd);
6750         else
6751                 ret = 0;
6752 out:
6753         splice_shrink_spd(&spd);
6754         return ret;
6755
6756 out_err:
6757         mutex_unlock(&iter->mutex);
6758         goto out;
6759 }
6760
6761 static ssize_t
6762 tracing_entries_read(struct file *filp, char __user *ubuf,
6763                      size_t cnt, loff_t *ppos)
6764 {
6765         struct inode *inode = file_inode(filp);
6766         struct trace_array *tr = inode->i_private;
6767         int cpu = tracing_get_cpu(inode);
6768         char buf[64];
6769         int r = 0;
6770         ssize_t ret;
6771
6772         mutex_lock(&trace_types_lock);
6773
6774         if (cpu == RING_BUFFER_ALL_CPUS) {
6775                 int cpu, buf_size_same;
6776                 unsigned long size;
6777
6778                 size = 0;
6779                 buf_size_same = 1;
6780                 /* check if all cpu sizes are same */
6781                 for_each_tracing_cpu(cpu) {
6782                         /* fill in the size from first enabled cpu */
6783                         if (size == 0)
6784                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6785                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6786                                 buf_size_same = 0;
6787                                 break;
6788                         }
6789                 }
6790
6791                 if (buf_size_same) {
6792                         if (!tr->ring_buffer_expanded)
6793                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6794                                             size >> 10,
6795                                             trace_buf_size >> 10);
6796                         else
6797                                 r = sprintf(buf, "%lu\n", size >> 10);
6798                 } else
6799                         r = sprintf(buf, "X\n");
6800         } else
6801                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6802
6803         mutex_unlock(&trace_types_lock);
6804
6805         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6806         return ret;
6807 }
6808
6809 static ssize_t
6810 tracing_entries_write(struct file *filp, const char __user *ubuf,
6811                       size_t cnt, loff_t *ppos)
6812 {
6813         struct inode *inode = file_inode(filp);
6814         struct trace_array *tr = inode->i_private;
6815         unsigned long val;
6816         int ret;
6817
6818         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6819         if (ret)
6820                 return ret;
6821
6822         /* must have at least 1 entry */
6823         if (!val)
6824                 return -EINVAL;
6825
6826         /* value is in KB */
6827         val <<= 10;
6828         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6829         if (ret < 0)
6830                 return ret;
6831
6832         *ppos += cnt;
6833
6834         return cnt;
6835 }
6836
6837 static ssize_t
6838 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6839                                 size_t cnt, loff_t *ppos)
6840 {
6841         struct trace_array *tr = filp->private_data;
6842         char buf[64];
6843         int r, cpu;
6844         unsigned long size = 0, expanded_size = 0;
6845
6846         mutex_lock(&trace_types_lock);
6847         for_each_tracing_cpu(cpu) {
6848                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6849                 if (!tr->ring_buffer_expanded)
6850                         expanded_size += trace_buf_size >> 10;
6851         }
6852         if (tr->ring_buffer_expanded)
6853                 r = sprintf(buf, "%lu\n", size);
6854         else
6855                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6856         mutex_unlock(&trace_types_lock);
6857
6858         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6859 }
6860
6861 static ssize_t
6862 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6863 {
6864         struct trace_array *tr = filp->private_data;
6865         struct seq_buf seq;
6866         char buf[64];
6867
6868         seq_buf_init(&seq, buf, 64);
6869
6870         seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6871         seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6872
6873         return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6874 }
6875
6876 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6877 {
6878         struct trace_array *tr = inode->i_private;
6879         int cpu = tracing_get_cpu(inode);
6880         int ret;
6881
6882         ret = tracing_check_open_get_tr(tr);
6883         if (ret)
6884                 return ret;
6885
6886         ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6887         if (ret < 0)
6888                 __trace_array_put(tr);
6889         return ret;
6890 }
6891
6892 static ssize_t
6893 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6894                           size_t cnt, loff_t *ppos)
6895 {
6896         /*
6897          * There is no need to read what the user has written, this function
6898          * is just to make sure that there is no error when "echo" is used
6899          */
6900
6901         *ppos += cnt;
6902
6903         return cnt;
6904 }
6905
6906 static int
6907 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6908 {
6909         struct trace_array *tr = inode->i_private;
6910
6911         /* disable tracing ? */
6912         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6913                 tracer_tracing_off(tr);
6914         /* resize the ring buffer to 0 */
6915         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6916
6917         trace_array_put(tr);
6918
6919         return 0;
6920 }
6921
6922 #define TRACE_MARKER_MAX_SIZE           4096
6923
6924 static ssize_t
6925 tracing_mark_write(struct file *filp, const char __user *ubuf,
6926                                         size_t cnt, loff_t *fpos)
6927 {
6928         struct trace_array *tr = filp->private_data;
6929         struct ring_buffer_event *event;
6930         enum event_trigger_type tt = ETT_NONE;
6931         struct trace_buffer *buffer;
6932         struct print_entry *entry;
6933         int meta_size;
6934         ssize_t written;
6935         size_t size;
6936         int len;
6937
6938 /* Used in tracing_mark_raw_write() as well */
6939 #define FAULTED_STR "<faulted>"
6940 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6941
6942         if (tracing_disabled)
6943                 return -EINVAL;
6944
6945         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6946                 return -EINVAL;
6947
6948         if ((ssize_t)cnt < 0)
6949                 return -EINVAL;
6950
6951         if (cnt > TRACE_MARKER_MAX_SIZE)
6952                 cnt = TRACE_MARKER_MAX_SIZE;
6953
6954         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6955  again:
6956         size = cnt + meta_size;
6957
6958         /* If less than "<faulted>", then make sure we can still add that */
6959         if (cnt < FAULTED_SIZE)
6960                 size += FAULTED_SIZE - cnt;
6961
6962         buffer = tr->array_buffer.buffer;
6963         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6964                                             tracing_gen_ctx());
6965         if (unlikely(!event)) {
6966                 /*
6967                  * If the size was greater than what was allowed, then
6968                  * make it smaller and try again.
6969                  */
6970                 if (size > ring_buffer_max_event_size(buffer)) {
6971                         /* cnt < FAULTED size should never be bigger than max */
6972                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6973                                 return -EBADF;
6974                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
6975                         /* The above should only happen once */
6976                         if (WARN_ON_ONCE(cnt + meta_size == size))
6977                                 return -EBADF;
6978                         goto again;
6979                 }
6980
6981                 /* Ring buffer disabled, return as if not open for write */
6982                 return -EBADF;
6983         }
6984
6985         entry = ring_buffer_event_data(event);
6986         entry->ip = _THIS_IP_;
6987
6988         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6989         if (len) {
6990                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6991                 cnt = FAULTED_SIZE;
6992                 written = -EFAULT;
6993         } else
6994                 written = cnt;
6995
6996         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6997                 /* do not add \n before testing triggers, but add \0 */
6998                 entry->buf[cnt] = '\0';
6999                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7000         }
7001
7002         if (entry->buf[cnt - 1] != '\n') {
7003                 entry->buf[cnt] = '\n';
7004                 entry->buf[cnt + 1] = '\0';
7005         } else
7006                 entry->buf[cnt] = '\0';
7007
7008         if (static_branch_unlikely(&trace_marker_exports_enabled))
7009                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7010         __buffer_unlock_commit(buffer, event);
7011
7012         if (tt)
7013                 event_triggers_post_call(tr->trace_marker_file, tt);
7014
7015         return written;
7016 }
7017
7018 static ssize_t
7019 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7020                                         size_t cnt, loff_t *fpos)
7021 {
7022         struct trace_array *tr = filp->private_data;
7023         struct ring_buffer_event *event;
7024         struct trace_buffer *buffer;
7025         struct raw_data_entry *entry;
7026         ssize_t written;
7027         int size;
7028         int len;
7029
7030 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7031
7032         if (tracing_disabled)
7033                 return -EINVAL;
7034
7035         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7036                 return -EINVAL;
7037
7038         /* The marker must at least have a tag id */
7039         if (cnt < sizeof(unsigned int))
7040                 return -EINVAL;
7041
7042         size = sizeof(*entry) + cnt;
7043         if (cnt < FAULT_SIZE_ID)
7044                 size += FAULT_SIZE_ID - cnt;
7045
7046         buffer = tr->array_buffer.buffer;
7047
7048         if (size > ring_buffer_max_event_size(buffer))
7049                 return -EINVAL;
7050
7051         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7052                                             tracing_gen_ctx());
7053         if (!event)
7054                 /* Ring buffer disabled, return as if not open for write */
7055                 return -EBADF;
7056
7057         entry = ring_buffer_event_data(event);
7058
7059         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7060         if (len) {
7061                 entry->id = -1;
7062                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7063                 written = -EFAULT;
7064         } else
7065                 written = cnt;
7066
7067         __buffer_unlock_commit(buffer, event);
7068
7069         return written;
7070 }
7071
7072 static int tracing_clock_show(struct seq_file *m, void *v)
7073 {
7074         struct trace_array *tr = m->private;
7075         int i;
7076
7077         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7078                 seq_printf(m,
7079                         "%s%s%s%s", i ? " " : "",
7080                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7081                         i == tr->clock_id ? "]" : "");
7082         seq_putc(m, '\n');
7083
7084         return 0;
7085 }
7086
7087 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7088 {
7089         int i;
7090
7091         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7092                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7093                         break;
7094         }
7095         if (i == ARRAY_SIZE(trace_clocks))
7096                 return -EINVAL;
7097
7098         mutex_lock(&trace_types_lock);
7099
7100         tr->clock_id = i;
7101
7102         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7103
7104         /*
7105          * New clock may not be consistent with the previous clock.
7106          * Reset the buffer so that it doesn't have incomparable timestamps.
7107          */
7108         tracing_reset_online_cpus(&tr->array_buffer);
7109
7110 #ifdef CONFIG_TRACER_MAX_TRACE
7111         if (tr->max_buffer.buffer)
7112                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7113         tracing_reset_online_cpus(&tr->max_buffer);
7114 #endif
7115
7116         mutex_unlock(&trace_types_lock);
7117
7118         return 0;
7119 }
7120
7121 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7122                                    size_t cnt, loff_t *fpos)
7123 {
7124         struct seq_file *m = filp->private_data;
7125         struct trace_array *tr = m->private;
7126         char buf[64];
7127         const char *clockstr;
7128         int ret;
7129
7130         if (cnt >= sizeof(buf))
7131                 return -EINVAL;
7132
7133         if (copy_from_user(buf, ubuf, cnt))
7134                 return -EFAULT;
7135
7136         buf[cnt] = 0;
7137
7138         clockstr = strstrip(buf);
7139
7140         ret = tracing_set_clock(tr, clockstr);
7141         if (ret)
7142                 return ret;
7143
7144         *fpos += cnt;
7145
7146         return cnt;
7147 }
7148
7149 static int tracing_clock_open(struct inode *inode, struct file *file)
7150 {
7151         struct trace_array *tr = inode->i_private;
7152         int ret;
7153
7154         ret = tracing_check_open_get_tr(tr);
7155         if (ret)
7156                 return ret;
7157
7158         ret = single_open(file, tracing_clock_show, inode->i_private);
7159         if (ret < 0)
7160                 trace_array_put(tr);
7161
7162         return ret;
7163 }
7164
7165 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7166 {
7167         struct trace_array *tr = m->private;
7168
7169         mutex_lock(&trace_types_lock);
7170
7171         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7172                 seq_puts(m, "delta [absolute]\n");
7173         else
7174                 seq_puts(m, "[delta] absolute\n");
7175
7176         mutex_unlock(&trace_types_lock);
7177
7178         return 0;
7179 }
7180
7181 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7182 {
7183         struct trace_array *tr = inode->i_private;
7184         int ret;
7185
7186         ret = tracing_check_open_get_tr(tr);
7187         if (ret)
7188                 return ret;
7189
7190         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7191         if (ret < 0)
7192                 trace_array_put(tr);
7193
7194         return ret;
7195 }
7196
7197 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7198 {
7199         if (rbe == this_cpu_read(trace_buffered_event))
7200                 return ring_buffer_time_stamp(buffer);
7201
7202         return ring_buffer_event_time_stamp(buffer, rbe);
7203 }
7204
7205 /*
7206  * Set or disable using the per CPU trace_buffer_event when possible.
7207  */
7208 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7209 {
7210         int ret = 0;
7211
7212         mutex_lock(&trace_types_lock);
7213
7214         if (set && tr->no_filter_buffering_ref++)
7215                 goto out;
7216
7217         if (!set) {
7218                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7219                         ret = -EINVAL;
7220                         goto out;
7221                 }
7222
7223                 --tr->no_filter_buffering_ref;
7224         }
7225  out:
7226         mutex_unlock(&trace_types_lock);
7227
7228         return ret;
7229 }
7230
7231 struct ftrace_buffer_info {
7232         struct trace_iterator   iter;
7233         void                    *spare;
7234         unsigned int            spare_cpu;
7235         unsigned int            spare_size;
7236         unsigned int            read;
7237 };
7238
7239 #ifdef CONFIG_TRACER_SNAPSHOT
7240 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7241 {
7242         struct trace_array *tr = inode->i_private;
7243         struct trace_iterator *iter;
7244         struct seq_file *m;
7245         int ret;
7246
7247         ret = tracing_check_open_get_tr(tr);
7248         if (ret)
7249                 return ret;
7250
7251         if (file->f_mode & FMODE_READ) {
7252                 iter = __tracing_open(inode, file, true);
7253                 if (IS_ERR(iter))
7254                         ret = PTR_ERR(iter);
7255         } else {
7256                 /* Writes still need the seq_file to hold the private data */
7257                 ret = -ENOMEM;
7258                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7259                 if (!m)
7260                         goto out;
7261                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7262                 if (!iter) {
7263                         kfree(m);
7264                         goto out;
7265                 }
7266                 ret = 0;
7267
7268                 iter->tr = tr;
7269                 iter->array_buffer = &tr->max_buffer;
7270                 iter->cpu_file = tracing_get_cpu(inode);
7271                 m->private = iter;
7272                 file->private_data = m;
7273         }
7274 out:
7275         if (ret < 0)
7276                 trace_array_put(tr);
7277
7278         return ret;
7279 }
7280
7281 static void tracing_swap_cpu_buffer(void *tr)
7282 {
7283         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7284 }
7285
7286 static ssize_t
7287 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7288                        loff_t *ppos)
7289 {
7290         struct seq_file *m = filp->private_data;
7291         struct trace_iterator *iter = m->private;
7292         struct trace_array *tr = iter->tr;
7293         unsigned long val;
7294         int ret;
7295
7296         ret = tracing_update_buffers(tr);
7297         if (ret < 0)
7298                 return ret;
7299
7300         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7301         if (ret)
7302                 return ret;
7303
7304         mutex_lock(&trace_types_lock);
7305
7306         if (tr->current_trace->use_max_tr) {
7307                 ret = -EBUSY;
7308                 goto out;
7309         }
7310
7311         local_irq_disable();
7312         arch_spin_lock(&tr->max_lock);
7313         if (tr->cond_snapshot)
7314                 ret = -EBUSY;
7315         arch_spin_unlock(&tr->max_lock);
7316         local_irq_enable();
7317         if (ret)
7318                 goto out;
7319
7320         switch (val) {
7321         case 0:
7322                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7323                         ret = -EINVAL;
7324                         break;
7325                 }
7326                 if (tr->allocated_snapshot)
7327                         free_snapshot(tr);
7328                 break;
7329         case 1:
7330 /* Only allow per-cpu swap if the ring buffer supports it */
7331 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7332                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7333                         ret = -EINVAL;
7334                         break;
7335                 }
7336 #endif
7337                 if (tr->allocated_snapshot)
7338                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7339                                         &tr->array_buffer, iter->cpu_file);
7340
7341                 ret = tracing_arm_snapshot_locked(tr);
7342                 if (ret)
7343                         break;
7344
7345                 /* Now, we're going to swap */
7346                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7347                         local_irq_disable();
7348                         update_max_tr(tr, current, smp_processor_id(), NULL);
7349                         local_irq_enable();
7350                 } else {
7351                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7352                                                  (void *)tr, 1);
7353                 }
7354                 tracing_disarm_snapshot(tr);
7355                 break;
7356         default:
7357                 if (tr->allocated_snapshot) {
7358                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7359                                 tracing_reset_online_cpus(&tr->max_buffer);
7360                         else
7361                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7362                 }
7363                 break;
7364         }
7365
7366         if (ret >= 0) {
7367                 *ppos += cnt;
7368                 ret = cnt;
7369         }
7370 out:
7371         mutex_unlock(&trace_types_lock);
7372         return ret;
7373 }
7374
7375 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7376 {
7377         struct seq_file *m = file->private_data;
7378         int ret;
7379
7380         ret = tracing_release(inode, file);
7381
7382         if (file->f_mode & FMODE_READ)
7383                 return ret;
7384
7385         /* If write only, the seq_file is just a stub */
7386         if (m)
7387                 kfree(m->private);
7388         kfree(m);
7389
7390         return 0;
7391 }
7392
7393 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7394 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7395                                     size_t count, loff_t *ppos);
7396 static int tracing_buffers_release(struct inode *inode, struct file *file);
7397 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7398                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7399
7400 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7401 {
7402         struct ftrace_buffer_info *info;
7403         int ret;
7404
7405         /* The following checks for tracefs lockdown */
7406         ret = tracing_buffers_open(inode, filp);
7407         if (ret < 0)
7408                 return ret;
7409
7410         info = filp->private_data;
7411
7412         if (info->iter.trace->use_max_tr) {
7413                 tracing_buffers_release(inode, filp);
7414                 return -EBUSY;
7415         }
7416
7417         info->iter.snapshot = true;
7418         info->iter.array_buffer = &info->iter.tr->max_buffer;
7419
7420         return ret;
7421 }
7422
7423 #endif /* CONFIG_TRACER_SNAPSHOT */
7424
7425
7426 static const struct file_operations tracing_thresh_fops = {
7427         .open           = tracing_open_generic,
7428         .read           = tracing_thresh_read,
7429         .write          = tracing_thresh_write,
7430         .llseek         = generic_file_llseek,
7431 };
7432
7433 #ifdef CONFIG_TRACER_MAX_TRACE
7434 static const struct file_operations tracing_max_lat_fops = {
7435         .open           = tracing_open_generic_tr,
7436         .read           = tracing_max_lat_read,
7437         .write          = tracing_max_lat_write,
7438         .llseek         = generic_file_llseek,
7439         .release        = tracing_release_generic_tr,
7440 };
7441 #endif
7442
7443 static const struct file_operations set_tracer_fops = {
7444         .open           = tracing_open_generic_tr,
7445         .read           = tracing_set_trace_read,
7446         .write          = tracing_set_trace_write,
7447         .llseek         = generic_file_llseek,
7448         .release        = tracing_release_generic_tr,
7449 };
7450
7451 static const struct file_operations tracing_pipe_fops = {
7452         .open           = tracing_open_pipe,
7453         .poll           = tracing_poll_pipe,
7454         .read           = tracing_read_pipe,
7455         .splice_read    = tracing_splice_read_pipe,
7456         .release        = tracing_release_pipe,
7457 };
7458
7459 static const struct file_operations tracing_entries_fops = {
7460         .open           = tracing_open_generic_tr,
7461         .read           = tracing_entries_read,
7462         .write          = tracing_entries_write,
7463         .llseek         = generic_file_llseek,
7464         .release        = tracing_release_generic_tr,
7465 };
7466
7467 static const struct file_operations tracing_buffer_meta_fops = {
7468         .open           = tracing_buffer_meta_open,
7469         .read           = seq_read,
7470         .llseek         = seq_lseek,
7471         .release        = tracing_seq_release,
7472 };
7473
7474 static const struct file_operations tracing_total_entries_fops = {
7475         .open           = tracing_open_generic_tr,
7476         .read           = tracing_total_entries_read,
7477         .llseek         = generic_file_llseek,
7478         .release        = tracing_release_generic_tr,
7479 };
7480
7481 static const struct file_operations tracing_free_buffer_fops = {
7482         .open           = tracing_open_generic_tr,
7483         .write          = tracing_free_buffer_write,
7484         .release        = tracing_free_buffer_release,
7485 };
7486
7487 static const struct file_operations tracing_mark_fops = {
7488         .open           = tracing_mark_open,
7489         .write          = tracing_mark_write,
7490         .release        = tracing_release_generic_tr,
7491 };
7492
7493 static const struct file_operations tracing_mark_raw_fops = {
7494         .open           = tracing_mark_open,
7495         .write          = tracing_mark_raw_write,
7496         .release        = tracing_release_generic_tr,
7497 };
7498
7499 static const struct file_operations trace_clock_fops = {
7500         .open           = tracing_clock_open,
7501         .read           = seq_read,
7502         .llseek         = seq_lseek,
7503         .release        = tracing_single_release_tr,
7504         .write          = tracing_clock_write,
7505 };
7506
7507 static const struct file_operations trace_time_stamp_mode_fops = {
7508         .open           = tracing_time_stamp_mode_open,
7509         .read           = seq_read,
7510         .llseek         = seq_lseek,
7511         .release        = tracing_single_release_tr,
7512 };
7513
7514 static const struct file_operations last_boot_fops = {
7515         .open           = tracing_open_generic_tr,
7516         .read           = tracing_last_boot_read,
7517         .llseek         = generic_file_llseek,
7518         .release        = tracing_release_generic_tr,
7519 };
7520
7521 #ifdef CONFIG_TRACER_SNAPSHOT
7522 static const struct file_operations snapshot_fops = {
7523         .open           = tracing_snapshot_open,
7524         .read           = seq_read,
7525         .write          = tracing_snapshot_write,
7526         .llseek         = tracing_lseek,
7527         .release        = tracing_snapshot_release,
7528 };
7529
7530 static const struct file_operations snapshot_raw_fops = {
7531         .open           = snapshot_raw_open,
7532         .read           = tracing_buffers_read,
7533         .release        = tracing_buffers_release,
7534         .splice_read    = tracing_buffers_splice_read,
7535 };
7536
7537 #endif /* CONFIG_TRACER_SNAPSHOT */
7538
7539 /*
7540  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7541  * @filp: The active open file structure
7542  * @ubuf: The userspace provided buffer to read value into
7543  * @cnt: The maximum number of bytes to read
7544  * @ppos: The current "file" position
7545  *
7546  * This function implements the write interface for a struct trace_min_max_param.
7547  * The filp->private_data must point to a trace_min_max_param structure that
7548  * defines where to write the value, the min and the max acceptable values,
7549  * and a lock to protect the write.
7550  */
7551 static ssize_t
7552 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554         struct trace_min_max_param *param = filp->private_data;
7555         u64 val;
7556         int err;
7557
7558         if (!param)
7559                 return -EFAULT;
7560
7561         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7562         if (err)
7563                 return err;
7564
7565         if (param->lock)
7566                 mutex_lock(param->lock);
7567
7568         if (param->min && val < *param->min)
7569                 err = -EINVAL;
7570
7571         if (param->max && val > *param->max)
7572                 err = -EINVAL;
7573
7574         if (!err)
7575                 *param->val = val;
7576
7577         if (param->lock)
7578                 mutex_unlock(param->lock);
7579
7580         if (err)
7581                 return err;
7582
7583         return cnt;
7584 }
7585
7586 /*
7587  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7588  * @filp: The active open file structure
7589  * @ubuf: The userspace provided buffer to read value into
7590  * @cnt: The maximum number of bytes to read
7591  * @ppos: The current "file" position
7592  *
7593  * This function implements the read interface for a struct trace_min_max_param.
7594  * The filp->private_data must point to a trace_min_max_param struct with valid
7595  * data.
7596  */
7597 static ssize_t
7598 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7599 {
7600         struct trace_min_max_param *param = filp->private_data;
7601         char buf[U64_STR_SIZE];
7602         int len;
7603         u64 val;
7604
7605         if (!param)
7606                 return -EFAULT;
7607
7608         val = *param->val;
7609
7610         if (cnt > sizeof(buf))
7611                 cnt = sizeof(buf);
7612
7613         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7614
7615         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7616 }
7617
7618 const struct file_operations trace_min_max_fops = {
7619         .open           = tracing_open_generic,
7620         .read           = trace_min_max_read,
7621         .write          = trace_min_max_write,
7622 };
7623
7624 #define TRACING_LOG_ERRS_MAX    8
7625 #define TRACING_LOG_LOC_MAX     128
7626
7627 #define CMD_PREFIX "  Command: "
7628
7629 struct err_info {
7630         const char      **errs; /* ptr to loc-specific array of err strings */
7631         u8              type;   /* index into errs -> specific err string */
7632         u16             pos;    /* caret position */
7633         u64             ts;
7634 };
7635
7636 struct tracing_log_err {
7637         struct list_head        list;
7638         struct err_info         info;
7639         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7640         char                    *cmd;                     /* what caused err */
7641 };
7642
7643 static DEFINE_MUTEX(tracing_err_log_lock);
7644
7645 static struct tracing_log_err *alloc_tracing_log_err(int len)
7646 {
7647         struct tracing_log_err *err;
7648
7649         err = kzalloc(sizeof(*err), GFP_KERNEL);
7650         if (!err)
7651                 return ERR_PTR(-ENOMEM);
7652
7653         err->cmd = kzalloc(len, GFP_KERNEL);
7654         if (!err->cmd) {
7655                 kfree(err);
7656                 return ERR_PTR(-ENOMEM);
7657         }
7658
7659         return err;
7660 }
7661
7662 static void free_tracing_log_err(struct tracing_log_err *err)
7663 {
7664         kfree(err->cmd);
7665         kfree(err);
7666 }
7667
7668 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7669                                                    int len)
7670 {
7671         struct tracing_log_err *err;
7672         char *cmd;
7673
7674         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7675                 err = alloc_tracing_log_err(len);
7676                 if (PTR_ERR(err) != -ENOMEM)
7677                         tr->n_err_log_entries++;
7678
7679                 return err;
7680         }
7681         cmd = kzalloc(len, GFP_KERNEL);
7682         if (!cmd)
7683                 return ERR_PTR(-ENOMEM);
7684         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7685         kfree(err->cmd);
7686         err->cmd = cmd;
7687         list_del(&err->list);
7688
7689         return err;
7690 }
7691
7692 /**
7693  * err_pos - find the position of a string within a command for error careting
7694  * @cmd: The tracing command that caused the error
7695  * @str: The string to position the caret at within @cmd
7696  *
7697  * Finds the position of the first occurrence of @str within @cmd.  The
7698  * return value can be passed to tracing_log_err() for caret placement
7699  * within @cmd.
7700  *
7701  * Returns the index within @cmd of the first occurrence of @str or 0
7702  * if @str was not found.
7703  */
7704 unsigned int err_pos(char *cmd, const char *str)
7705 {
7706         char *found;
7707
7708         if (WARN_ON(!strlen(cmd)))
7709                 return 0;
7710
7711         found = strstr(cmd, str);
7712         if (found)
7713                 return found - cmd;
7714
7715         return 0;
7716 }
7717
7718 /**
7719  * tracing_log_err - write an error to the tracing error log
7720  * @tr: The associated trace array for the error (NULL for top level array)
7721  * @loc: A string describing where the error occurred
7722  * @cmd: The tracing command that caused the error
7723  * @errs: The array of loc-specific static error strings
7724  * @type: The index into errs[], which produces the specific static err string
7725  * @pos: The position the caret should be placed in the cmd
7726  *
7727  * Writes an error into tracing/error_log of the form:
7728  *
7729  * <loc>: error: <text>
7730  *   Command: <cmd>
7731  *              ^
7732  *
7733  * tracing/error_log is a small log file containing the last
7734  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7735  * unless there has been a tracing error, and the error log can be
7736  * cleared and have its memory freed by writing the empty string in
7737  * truncation mode to it i.e. echo > tracing/error_log.
7738  *
7739  * NOTE: the @errs array along with the @type param are used to
7740  * produce a static error string - this string is not copied and saved
7741  * when the error is logged - only a pointer to it is saved.  See
7742  * existing callers for examples of how static strings are typically
7743  * defined for use with tracing_log_err().
7744  */
7745 void tracing_log_err(struct trace_array *tr,
7746                      const char *loc, const char *cmd,
7747                      const char **errs, u8 type, u16 pos)
7748 {
7749         struct tracing_log_err *err;
7750         int len = 0;
7751
7752         if (!tr)
7753                 tr = &global_trace;
7754
7755         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7756
7757         mutex_lock(&tracing_err_log_lock);
7758         err = get_tracing_log_err(tr, len);
7759         if (PTR_ERR(err) == -ENOMEM) {
7760                 mutex_unlock(&tracing_err_log_lock);
7761                 return;
7762         }
7763
7764         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7765         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7766
7767         err->info.errs = errs;
7768         err->info.type = type;
7769         err->info.pos = pos;
7770         err->info.ts = local_clock();
7771
7772         list_add_tail(&err->list, &tr->err_log);
7773         mutex_unlock(&tracing_err_log_lock);
7774 }
7775
7776 static void clear_tracing_err_log(struct trace_array *tr)
7777 {
7778         struct tracing_log_err *err, *next;
7779
7780         mutex_lock(&tracing_err_log_lock);
7781         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7782                 list_del(&err->list);
7783                 free_tracing_log_err(err);
7784         }
7785
7786         tr->n_err_log_entries = 0;
7787         mutex_unlock(&tracing_err_log_lock);
7788 }
7789
7790 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7791 {
7792         struct trace_array *tr = m->private;
7793
7794         mutex_lock(&tracing_err_log_lock);
7795
7796         return seq_list_start(&tr->err_log, *pos);
7797 }
7798
7799 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7800 {
7801         struct trace_array *tr = m->private;
7802
7803         return seq_list_next(v, &tr->err_log, pos);
7804 }
7805
7806 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7807 {
7808         mutex_unlock(&tracing_err_log_lock);
7809 }
7810
7811 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7812 {
7813         u16 i;
7814
7815         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7816                 seq_putc(m, ' ');
7817         for (i = 0; i < pos; i++)
7818                 seq_putc(m, ' ');
7819         seq_puts(m, "^\n");
7820 }
7821
7822 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7823 {
7824         struct tracing_log_err *err = v;
7825
7826         if (err) {
7827                 const char *err_text = err->info.errs[err->info.type];
7828                 u64 sec = err->info.ts;
7829                 u32 nsec;
7830
7831                 nsec = do_div(sec, NSEC_PER_SEC);
7832                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7833                            err->loc, err_text);
7834                 seq_printf(m, "%s", err->cmd);
7835                 tracing_err_log_show_pos(m, err->info.pos);
7836         }
7837
7838         return 0;
7839 }
7840
7841 static const struct seq_operations tracing_err_log_seq_ops = {
7842         .start  = tracing_err_log_seq_start,
7843         .next   = tracing_err_log_seq_next,
7844         .stop   = tracing_err_log_seq_stop,
7845         .show   = tracing_err_log_seq_show
7846 };
7847
7848 static int tracing_err_log_open(struct inode *inode, struct file *file)
7849 {
7850         struct trace_array *tr = inode->i_private;
7851         int ret = 0;
7852
7853         ret = tracing_check_open_get_tr(tr);
7854         if (ret)
7855                 return ret;
7856
7857         /* If this file was opened for write, then erase contents */
7858         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7859                 clear_tracing_err_log(tr);
7860
7861         if (file->f_mode & FMODE_READ) {
7862                 ret = seq_open(file, &tracing_err_log_seq_ops);
7863                 if (!ret) {
7864                         struct seq_file *m = file->private_data;
7865                         m->private = tr;
7866                 } else {
7867                         trace_array_put(tr);
7868                 }
7869         }
7870         return ret;
7871 }
7872
7873 static ssize_t tracing_err_log_write(struct file *file,
7874                                      const char __user *buffer,
7875                                      size_t count, loff_t *ppos)
7876 {
7877         return count;
7878 }
7879
7880 static int tracing_err_log_release(struct inode *inode, struct file *file)
7881 {
7882         struct trace_array *tr = inode->i_private;
7883
7884         trace_array_put(tr);
7885
7886         if (file->f_mode & FMODE_READ)
7887                 seq_release(inode, file);
7888
7889         return 0;
7890 }
7891
7892 static const struct file_operations tracing_err_log_fops = {
7893         .open           = tracing_err_log_open,
7894         .write          = tracing_err_log_write,
7895         .read           = seq_read,
7896         .llseek         = tracing_lseek,
7897         .release        = tracing_err_log_release,
7898 };
7899
7900 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7901 {
7902         struct trace_array *tr = inode->i_private;
7903         struct ftrace_buffer_info *info;
7904         int ret;
7905
7906         ret = tracing_check_open_get_tr(tr);
7907         if (ret)
7908                 return ret;
7909
7910         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7911         if (!info) {
7912                 trace_array_put(tr);
7913                 return -ENOMEM;
7914         }
7915
7916         mutex_lock(&trace_types_lock);
7917
7918         info->iter.tr           = tr;
7919         info->iter.cpu_file     = tracing_get_cpu(inode);
7920         info->iter.trace        = tr->current_trace;
7921         info->iter.array_buffer = &tr->array_buffer;
7922         info->spare             = NULL;
7923         /* Force reading ring buffer for first read */
7924         info->read              = (unsigned int)-1;
7925
7926         filp->private_data = info;
7927
7928         tr->trace_ref++;
7929
7930         mutex_unlock(&trace_types_lock);
7931
7932         ret = nonseekable_open(inode, filp);
7933         if (ret < 0)
7934                 trace_array_put(tr);
7935
7936         return ret;
7937 }
7938
7939 static __poll_t
7940 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7941 {
7942         struct ftrace_buffer_info *info = filp->private_data;
7943         struct trace_iterator *iter = &info->iter;
7944
7945         return trace_poll(iter, filp, poll_table);
7946 }
7947
7948 static ssize_t
7949 tracing_buffers_read(struct file *filp, char __user *ubuf,
7950                      size_t count, loff_t *ppos)
7951 {
7952         struct ftrace_buffer_info *info = filp->private_data;
7953         struct trace_iterator *iter = &info->iter;
7954         void *trace_data;
7955         int page_size;
7956         ssize_t ret = 0;
7957         ssize_t size;
7958
7959         if (!count)
7960                 return 0;
7961
7962 #ifdef CONFIG_TRACER_MAX_TRACE
7963         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7964                 return -EBUSY;
7965 #endif
7966
7967         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7968
7969         /* Make sure the spare matches the current sub buffer size */
7970         if (info->spare) {
7971                 if (page_size != info->spare_size) {
7972                         ring_buffer_free_read_page(iter->array_buffer->buffer,
7973                                                    info->spare_cpu, info->spare);
7974                         info->spare = NULL;
7975                 }
7976         }
7977
7978         if (!info->spare) {
7979                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7980                                                           iter->cpu_file);
7981                 if (IS_ERR(info->spare)) {
7982                         ret = PTR_ERR(info->spare);
7983                         info->spare = NULL;
7984                 } else {
7985                         info->spare_cpu = iter->cpu_file;
7986                         info->spare_size = page_size;
7987                 }
7988         }
7989         if (!info->spare)
7990                 return ret;
7991
7992         /* Do we have previous read data to read? */
7993         if (info->read < page_size)
7994                 goto read;
7995
7996  again:
7997         trace_access_lock(iter->cpu_file);
7998         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7999                                     info->spare,
8000                                     count,
8001                                     iter->cpu_file, 0);
8002         trace_access_unlock(iter->cpu_file);
8003
8004         if (ret < 0) {
8005                 if (trace_empty(iter) && !iter->closed) {
8006                         if ((filp->f_flags & O_NONBLOCK))
8007                                 return -EAGAIN;
8008
8009                         ret = wait_on_pipe(iter, 0);
8010                         if (ret)
8011                                 return ret;
8012
8013                         goto again;
8014                 }
8015                 return 0;
8016         }
8017
8018         info->read = 0;
8019  read:
8020         size = page_size - info->read;
8021         if (size > count)
8022                 size = count;
8023         trace_data = ring_buffer_read_page_data(info->spare);
8024         ret = copy_to_user(ubuf, trace_data + info->read, size);
8025         if (ret == size)
8026                 return -EFAULT;
8027
8028         size -= ret;
8029
8030         *ppos += size;
8031         info->read += size;
8032
8033         return size;
8034 }
8035
8036 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8037 {
8038         struct ftrace_buffer_info *info = file->private_data;
8039         struct trace_iterator *iter = &info->iter;
8040
8041         iter->closed = true;
8042         /* Make sure the waiters see the new wait_index */
8043         (void)atomic_fetch_inc_release(&iter->wait_index);
8044
8045         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8046
8047         return 0;
8048 }
8049
8050 static int tracing_buffers_release(struct inode *inode, struct file *file)
8051 {
8052         struct ftrace_buffer_info *info = file->private_data;
8053         struct trace_iterator *iter = &info->iter;
8054
8055         mutex_lock(&trace_types_lock);
8056
8057         iter->tr->trace_ref--;
8058
8059         __trace_array_put(iter->tr);
8060
8061         if (info->spare)
8062                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8063                                            info->spare_cpu, info->spare);
8064         kvfree(info);
8065
8066         mutex_unlock(&trace_types_lock);
8067
8068         return 0;
8069 }
8070
8071 struct buffer_ref {
8072         struct trace_buffer     *buffer;
8073         void                    *page;
8074         int                     cpu;
8075         refcount_t              refcount;
8076 };
8077
8078 static void buffer_ref_release(struct buffer_ref *ref)
8079 {
8080         if (!refcount_dec_and_test(&ref->refcount))
8081                 return;
8082         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8083         kfree(ref);
8084 }
8085
8086 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8087                                     struct pipe_buffer *buf)
8088 {
8089         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8090
8091         buffer_ref_release(ref);
8092         buf->private = 0;
8093 }
8094
8095 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8096                                 struct pipe_buffer *buf)
8097 {
8098         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8099
8100         if (refcount_read(&ref->refcount) > INT_MAX/2)
8101                 return false;
8102
8103         refcount_inc(&ref->refcount);
8104         return true;
8105 }
8106
8107 /* Pipe buffer operations for a buffer. */
8108 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8109         .release                = buffer_pipe_buf_release,
8110         .get                    = buffer_pipe_buf_get,
8111 };
8112
8113 /*
8114  * Callback from splice_to_pipe(), if we need to release some pages
8115  * at the end of the spd in case we error'ed out in filling the pipe.
8116  */
8117 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8118 {
8119         struct buffer_ref *ref =
8120                 (struct buffer_ref *)spd->partial[i].private;
8121
8122         buffer_ref_release(ref);
8123         spd->partial[i].private = 0;
8124 }
8125
8126 static ssize_t
8127 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8128                             struct pipe_inode_info *pipe, size_t len,
8129                             unsigned int flags)
8130 {
8131         struct ftrace_buffer_info *info = file->private_data;
8132         struct trace_iterator *iter = &info->iter;
8133         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8134         struct page *pages_def[PIPE_DEF_BUFFERS];
8135         struct splice_pipe_desc spd = {
8136                 .pages          = pages_def,
8137                 .partial        = partial_def,
8138                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8139                 .ops            = &buffer_pipe_buf_ops,
8140                 .spd_release    = buffer_spd_release,
8141         };
8142         struct buffer_ref *ref;
8143         bool woken = false;
8144         int page_size;
8145         int entries, i;
8146         ssize_t ret = 0;
8147
8148 #ifdef CONFIG_TRACER_MAX_TRACE
8149         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8150                 return -EBUSY;
8151 #endif
8152
8153         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8154         if (*ppos & (page_size - 1))
8155                 return -EINVAL;
8156
8157         if (len & (page_size - 1)) {
8158                 if (len < page_size)
8159                         return -EINVAL;
8160                 len &= (~(page_size - 1));
8161         }
8162
8163         if (splice_grow_spd(pipe, &spd))
8164                 return -ENOMEM;
8165
8166  again:
8167         trace_access_lock(iter->cpu_file);
8168         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8169
8170         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8171                 struct page *page;
8172                 int r;
8173
8174                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8175                 if (!ref) {
8176                         ret = -ENOMEM;
8177                         break;
8178                 }
8179
8180                 refcount_set(&ref->refcount, 1);
8181                 ref->buffer = iter->array_buffer->buffer;
8182                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8183                 if (IS_ERR(ref->page)) {
8184                         ret = PTR_ERR(ref->page);
8185                         ref->page = NULL;
8186                         kfree(ref);
8187                         break;
8188                 }
8189                 ref->cpu = iter->cpu_file;
8190
8191                 r = ring_buffer_read_page(ref->buffer, ref->page,
8192                                           len, iter->cpu_file, 1);
8193                 if (r < 0) {
8194                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8195                                                    ref->page);
8196                         kfree(ref);
8197                         break;
8198                 }
8199
8200                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8201
8202                 spd.pages[i] = page;
8203                 spd.partial[i].len = page_size;
8204                 spd.partial[i].offset = 0;
8205                 spd.partial[i].private = (unsigned long)ref;
8206                 spd.nr_pages++;
8207                 *ppos += page_size;
8208
8209                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8210         }
8211
8212         trace_access_unlock(iter->cpu_file);
8213         spd.nr_pages = i;
8214
8215         /* did we read anything? */
8216         if (!spd.nr_pages) {
8217
8218                 if (ret)
8219                         goto out;
8220
8221                 if (woken)
8222                         goto out;
8223
8224                 ret = -EAGAIN;
8225                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8226                         goto out;
8227
8228                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8229                 if (ret)
8230                         goto out;
8231
8232                 /* No need to wait after waking up when tracing is off */
8233                 if (!tracer_tracing_is_on(iter->tr))
8234                         goto out;
8235
8236                 /* Iterate one more time to collect any new data then exit */
8237                 woken = true;
8238
8239                 goto again;
8240         }
8241
8242         ret = splice_to_pipe(pipe, &spd);
8243 out:
8244         splice_shrink_spd(&spd);
8245
8246         return ret;
8247 }
8248
8249 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8250 {
8251         struct ftrace_buffer_info *info = file->private_data;
8252         struct trace_iterator *iter = &info->iter;
8253         int err;
8254
8255         if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8256                 if (!(file->f_flags & O_NONBLOCK)) {
8257                         err = ring_buffer_wait(iter->array_buffer->buffer,
8258                                                iter->cpu_file,
8259                                                iter->tr->buffer_percent,
8260                                                NULL, NULL);
8261                         if (err)
8262                                 return err;
8263                 }
8264
8265                 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8266                                                   iter->cpu_file);
8267         } else if (cmd) {
8268                 return -ENOTTY;
8269         }
8270
8271         /*
8272          * An ioctl call with cmd 0 to the ring buffer file will wake up all
8273          * waiters
8274          */
8275         mutex_lock(&trace_types_lock);
8276
8277         /* Make sure the waiters see the new wait_index */
8278         (void)atomic_fetch_inc_release(&iter->wait_index);
8279
8280         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8281
8282         mutex_unlock(&trace_types_lock);
8283         return 0;
8284 }
8285
8286 #ifdef CONFIG_TRACER_MAX_TRACE
8287 static int get_snapshot_map(struct trace_array *tr)
8288 {
8289         int err = 0;
8290
8291         /*
8292          * Called with mmap_lock held. lockdep would be unhappy if we would now
8293          * take trace_types_lock. Instead use the specific
8294          * snapshot_trigger_lock.
8295          */
8296         spin_lock(&tr->snapshot_trigger_lock);
8297
8298         if (tr->snapshot || tr->mapped == UINT_MAX)
8299                 err = -EBUSY;
8300         else
8301                 tr->mapped++;
8302
8303         spin_unlock(&tr->snapshot_trigger_lock);
8304
8305         /* Wait for update_max_tr() to observe iter->tr->mapped */
8306         if (tr->mapped == 1)
8307                 synchronize_rcu();
8308
8309         return err;
8310
8311 }
8312 static void put_snapshot_map(struct trace_array *tr)
8313 {
8314         spin_lock(&tr->snapshot_trigger_lock);
8315         if (!WARN_ON(!tr->mapped))
8316                 tr->mapped--;
8317         spin_unlock(&tr->snapshot_trigger_lock);
8318 }
8319 #else
8320 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8321 static inline void put_snapshot_map(struct trace_array *tr) { }
8322 #endif
8323
8324 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8325 {
8326         struct ftrace_buffer_info *info = vma->vm_file->private_data;
8327         struct trace_iterator *iter = &info->iter;
8328
8329         WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8330         put_snapshot_map(iter->tr);
8331 }
8332
8333 static const struct vm_operations_struct tracing_buffers_vmops = {
8334         .close          = tracing_buffers_mmap_close,
8335 };
8336
8337 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8338 {
8339         struct ftrace_buffer_info *info = filp->private_data;
8340         struct trace_iterator *iter = &info->iter;
8341         int ret = 0;
8342
8343         ret = get_snapshot_map(iter->tr);
8344         if (ret)
8345                 return ret;
8346
8347         ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8348         if (ret)
8349                 put_snapshot_map(iter->tr);
8350
8351         vma->vm_ops = &tracing_buffers_vmops;
8352
8353         return ret;
8354 }
8355
8356 static const struct file_operations tracing_buffers_fops = {
8357         .open           = tracing_buffers_open,
8358         .read           = tracing_buffers_read,
8359         .poll           = tracing_buffers_poll,
8360         .release        = tracing_buffers_release,
8361         .flush          = tracing_buffers_flush,
8362         .splice_read    = tracing_buffers_splice_read,
8363         .unlocked_ioctl = tracing_buffers_ioctl,
8364         .mmap           = tracing_buffers_mmap,
8365 };
8366
8367 static ssize_t
8368 tracing_stats_read(struct file *filp, char __user *ubuf,
8369                    size_t count, loff_t *ppos)
8370 {
8371         struct inode *inode = file_inode(filp);
8372         struct trace_array *tr = inode->i_private;
8373         struct array_buffer *trace_buf = &tr->array_buffer;
8374         int cpu = tracing_get_cpu(inode);
8375         struct trace_seq *s;
8376         unsigned long cnt;
8377         unsigned long long t;
8378         unsigned long usec_rem;
8379
8380         s = kmalloc(sizeof(*s), GFP_KERNEL);
8381         if (!s)
8382                 return -ENOMEM;
8383
8384         trace_seq_init(s);
8385
8386         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8387         trace_seq_printf(s, "entries: %ld\n", cnt);
8388
8389         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8390         trace_seq_printf(s, "overrun: %ld\n", cnt);
8391
8392         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8393         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8394
8395         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8396         trace_seq_printf(s, "bytes: %ld\n", cnt);
8397
8398         if (trace_clocks[tr->clock_id].in_ns) {
8399                 /* local or global for trace_clock */
8400                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8401                 usec_rem = do_div(t, USEC_PER_SEC);
8402                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8403                                                                 t, usec_rem);
8404
8405                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8406                 usec_rem = do_div(t, USEC_PER_SEC);
8407                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8408         } else {
8409                 /* counter or tsc mode for trace_clock */
8410                 trace_seq_printf(s, "oldest event ts: %llu\n",
8411                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8412
8413                 trace_seq_printf(s, "now ts: %llu\n",
8414                                 ring_buffer_time_stamp(trace_buf->buffer));
8415         }
8416
8417         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8418         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8419
8420         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8421         trace_seq_printf(s, "read events: %ld\n", cnt);
8422
8423         count = simple_read_from_buffer(ubuf, count, ppos,
8424                                         s->buffer, trace_seq_used(s));
8425
8426         kfree(s);
8427
8428         return count;
8429 }
8430
8431 static const struct file_operations tracing_stats_fops = {
8432         .open           = tracing_open_generic_tr,
8433         .read           = tracing_stats_read,
8434         .llseek         = generic_file_llseek,
8435         .release        = tracing_release_generic_tr,
8436 };
8437
8438 #ifdef CONFIG_DYNAMIC_FTRACE
8439
8440 static ssize_t
8441 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8442                   size_t cnt, loff_t *ppos)
8443 {
8444         ssize_t ret;
8445         char *buf;
8446         int r;
8447
8448         /* 512 should be plenty to hold the amount needed */
8449 #define DYN_INFO_BUF_SIZE       512
8450
8451         buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8452         if (!buf)
8453                 return -ENOMEM;
8454
8455         r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8456                       "%ld pages:%ld groups: %ld\n"
8457                       "ftrace boot update time = %llu (ns)\n"
8458                       "ftrace module total update time = %llu (ns)\n",
8459                       ftrace_update_tot_cnt,
8460                       ftrace_number_of_pages,
8461                       ftrace_number_of_groups,
8462                       ftrace_update_time,
8463                       ftrace_total_mod_time);
8464
8465         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8466         kfree(buf);
8467         return ret;
8468 }
8469
8470 static const struct file_operations tracing_dyn_info_fops = {
8471         .open           = tracing_open_generic,
8472         .read           = tracing_read_dyn_info,
8473         .llseek         = generic_file_llseek,
8474 };
8475 #endif /* CONFIG_DYNAMIC_FTRACE */
8476
8477 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8478 static void
8479 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8480                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8481                 void *data)
8482 {
8483         tracing_snapshot_instance(tr);
8484 }
8485
8486 static void
8487 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8488                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8489                       void *data)
8490 {
8491         struct ftrace_func_mapper *mapper = data;
8492         long *count = NULL;
8493
8494         if (mapper)
8495                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8496
8497         if (count) {
8498
8499                 if (*count <= 0)
8500                         return;
8501
8502                 (*count)--;
8503         }
8504
8505         tracing_snapshot_instance(tr);
8506 }
8507
8508 static int
8509 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8510                       struct ftrace_probe_ops *ops, void *data)
8511 {
8512         struct ftrace_func_mapper *mapper = data;
8513         long *count = NULL;
8514
8515         seq_printf(m, "%ps:", (void *)ip);
8516
8517         seq_puts(m, "snapshot");
8518
8519         if (mapper)
8520                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8521
8522         if (count)
8523                 seq_printf(m, ":count=%ld\n", *count);
8524         else
8525                 seq_puts(m, ":unlimited\n");
8526
8527         return 0;
8528 }
8529
8530 static int
8531 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8532                      unsigned long ip, void *init_data, void **data)
8533 {
8534         struct ftrace_func_mapper *mapper = *data;
8535
8536         if (!mapper) {
8537                 mapper = allocate_ftrace_func_mapper();
8538                 if (!mapper)
8539                         return -ENOMEM;
8540                 *data = mapper;
8541         }
8542
8543         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8544 }
8545
8546 static void
8547 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8548                      unsigned long ip, void *data)
8549 {
8550         struct ftrace_func_mapper *mapper = data;
8551
8552         if (!ip) {
8553                 if (!mapper)
8554                         return;
8555                 free_ftrace_func_mapper(mapper, NULL);
8556                 return;
8557         }
8558
8559         ftrace_func_mapper_remove_ip(mapper, ip);
8560 }
8561
8562 static struct ftrace_probe_ops snapshot_probe_ops = {
8563         .func                   = ftrace_snapshot,
8564         .print                  = ftrace_snapshot_print,
8565 };
8566
8567 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8568         .func                   = ftrace_count_snapshot,
8569         .print                  = ftrace_snapshot_print,
8570         .init                   = ftrace_snapshot_init,
8571         .free                   = ftrace_snapshot_free,
8572 };
8573
8574 static int
8575 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8576                                char *glob, char *cmd, char *param, int enable)
8577 {
8578         struct ftrace_probe_ops *ops;
8579         void *count = (void *)-1;
8580         char *number;
8581         int ret;
8582
8583         if (!tr)
8584                 return -ENODEV;
8585
8586         /* hash funcs only work with set_ftrace_filter */
8587         if (!enable)
8588                 return -EINVAL;
8589
8590         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8591
8592         if (glob[0] == '!') {
8593                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8594                 if (!ret)
8595                         tracing_disarm_snapshot(tr);
8596
8597                 return ret;
8598         }
8599
8600         if (!param)
8601                 goto out_reg;
8602
8603         number = strsep(&param, ":");
8604
8605         if (!strlen(number))
8606                 goto out_reg;
8607
8608         /*
8609          * We use the callback data field (which is a pointer)
8610          * as our counter.
8611          */
8612         ret = kstrtoul(number, 0, (unsigned long *)&count);
8613         if (ret)
8614                 return ret;
8615
8616  out_reg:
8617         ret = tracing_arm_snapshot(tr);
8618         if (ret < 0)
8619                 goto out;
8620
8621         ret = register_ftrace_function_probe(glob, tr, ops, count);
8622         if (ret < 0)
8623                 tracing_disarm_snapshot(tr);
8624  out:
8625         return ret < 0 ? ret : 0;
8626 }
8627
8628 static struct ftrace_func_command ftrace_snapshot_cmd = {
8629         .name                   = "snapshot",
8630         .func                   = ftrace_trace_snapshot_callback,
8631 };
8632
8633 static __init int register_snapshot_cmd(void)
8634 {
8635         return register_ftrace_command(&ftrace_snapshot_cmd);
8636 }
8637 #else
8638 static inline __init int register_snapshot_cmd(void) { return 0; }
8639 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8640
8641 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8642 {
8643         if (WARN_ON(!tr->dir))
8644                 return ERR_PTR(-ENODEV);
8645
8646         /* Top directory uses NULL as the parent */
8647         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8648                 return NULL;
8649
8650         /* All sub buffers have a descriptor */
8651         return tr->dir;
8652 }
8653
8654 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8655 {
8656         struct dentry *d_tracer;
8657
8658         if (tr->percpu_dir)
8659                 return tr->percpu_dir;
8660
8661         d_tracer = tracing_get_dentry(tr);
8662         if (IS_ERR(d_tracer))
8663                 return NULL;
8664
8665         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8666
8667         MEM_FAIL(!tr->percpu_dir,
8668                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8669
8670         return tr->percpu_dir;
8671 }
8672
8673 static struct dentry *
8674 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8675                       void *data, long cpu, const struct file_operations *fops)
8676 {
8677         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8678
8679         if (ret) /* See tracing_get_cpu() */
8680                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8681         return ret;
8682 }
8683
8684 static void
8685 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8686 {
8687         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8688         struct dentry *d_cpu;
8689         char cpu_dir[30]; /* 30 characters should be more than enough */
8690
8691         if (!d_percpu)
8692                 return;
8693
8694         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8695         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8696         if (!d_cpu) {
8697                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8698                 return;
8699         }
8700
8701         /* per cpu trace_pipe */
8702         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8703                                 tr, cpu, &tracing_pipe_fops);
8704
8705         /* per cpu trace */
8706         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8707                                 tr, cpu, &tracing_fops);
8708
8709         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8710                                 tr, cpu, &tracing_buffers_fops);
8711
8712         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8713                                 tr, cpu, &tracing_stats_fops);
8714
8715         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8716                                 tr, cpu, &tracing_entries_fops);
8717
8718         if (tr->range_addr_start)
8719                 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8720                                       tr, cpu, &tracing_buffer_meta_fops);
8721 #ifdef CONFIG_TRACER_SNAPSHOT
8722         if (!tr->range_addr_start) {
8723                 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8724                                       tr, cpu, &snapshot_fops);
8725
8726                 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8727                                       tr, cpu, &snapshot_raw_fops);
8728         }
8729 #endif
8730 }
8731
8732 #ifdef CONFIG_FTRACE_SELFTEST
8733 /* Let selftest have access to static functions in this file */
8734 #include "trace_selftest.c"
8735 #endif
8736
8737 static ssize_t
8738 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8739                         loff_t *ppos)
8740 {
8741         struct trace_option_dentry *topt = filp->private_data;
8742         char *buf;
8743
8744         if (topt->flags->val & topt->opt->bit)
8745                 buf = "1\n";
8746         else
8747                 buf = "0\n";
8748
8749         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8750 }
8751
8752 static ssize_t
8753 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8754                          loff_t *ppos)
8755 {
8756         struct trace_option_dentry *topt = filp->private_data;
8757         unsigned long val;
8758         int ret;
8759
8760         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8761         if (ret)
8762                 return ret;
8763
8764         if (val != 0 && val != 1)
8765                 return -EINVAL;
8766
8767         if (!!(topt->flags->val & topt->opt->bit) != val) {
8768                 mutex_lock(&trace_types_lock);
8769                 ret = __set_tracer_option(topt->tr, topt->flags,
8770                                           topt->opt, !val);
8771                 mutex_unlock(&trace_types_lock);
8772                 if (ret)
8773                         return ret;
8774         }
8775
8776         *ppos += cnt;
8777
8778         return cnt;
8779 }
8780
8781 static int tracing_open_options(struct inode *inode, struct file *filp)
8782 {
8783         struct trace_option_dentry *topt = inode->i_private;
8784         int ret;
8785
8786         ret = tracing_check_open_get_tr(topt->tr);
8787         if (ret)
8788                 return ret;
8789
8790         filp->private_data = inode->i_private;
8791         return 0;
8792 }
8793
8794 static int tracing_release_options(struct inode *inode, struct file *file)
8795 {
8796         struct trace_option_dentry *topt = file->private_data;
8797
8798         trace_array_put(topt->tr);
8799         return 0;
8800 }
8801
8802 static const struct file_operations trace_options_fops = {
8803         .open = tracing_open_options,
8804         .read = trace_options_read,
8805         .write = trace_options_write,
8806         .llseek = generic_file_llseek,
8807         .release = tracing_release_options,
8808 };
8809
8810 /*
8811  * In order to pass in both the trace_array descriptor as well as the index
8812  * to the flag that the trace option file represents, the trace_array
8813  * has a character array of trace_flags_index[], which holds the index
8814  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8815  * The address of this character array is passed to the flag option file
8816  * read/write callbacks.
8817  *
8818  * In order to extract both the index and the trace_array descriptor,
8819  * get_tr_index() uses the following algorithm.
8820  *
8821  *   idx = *ptr;
8822  *
8823  * As the pointer itself contains the address of the index (remember
8824  * index[1] == 1).
8825  *
8826  * Then to get the trace_array descriptor, by subtracting that index
8827  * from the ptr, we get to the start of the index itself.
8828  *
8829  *   ptr - idx == &index[0]
8830  *
8831  * Then a simple container_of() from that pointer gets us to the
8832  * trace_array descriptor.
8833  */
8834 static void get_tr_index(void *data, struct trace_array **ptr,
8835                          unsigned int *pindex)
8836 {
8837         *pindex = *(unsigned char *)data;
8838
8839         *ptr = container_of(data - *pindex, struct trace_array,
8840                             trace_flags_index);
8841 }
8842
8843 static ssize_t
8844 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8845                         loff_t *ppos)
8846 {
8847         void *tr_index = filp->private_data;
8848         struct trace_array *tr;
8849         unsigned int index;
8850         char *buf;
8851
8852         get_tr_index(tr_index, &tr, &index);
8853
8854         if (tr->trace_flags & (1 << index))
8855                 buf = "1\n";
8856         else
8857                 buf = "0\n";
8858
8859         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8860 }
8861
8862 static ssize_t
8863 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8864                          loff_t *ppos)
8865 {
8866         void *tr_index = filp->private_data;
8867         struct trace_array *tr;
8868         unsigned int index;
8869         unsigned long val;
8870         int ret;
8871
8872         get_tr_index(tr_index, &tr, &index);
8873
8874         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8875         if (ret)
8876                 return ret;
8877
8878         if (val != 0 && val != 1)
8879                 return -EINVAL;
8880
8881         mutex_lock(&event_mutex);
8882         mutex_lock(&trace_types_lock);
8883         ret = set_tracer_flag(tr, 1 << index, val);
8884         mutex_unlock(&trace_types_lock);
8885         mutex_unlock(&event_mutex);
8886
8887         if (ret < 0)
8888                 return ret;
8889
8890         *ppos += cnt;
8891
8892         return cnt;
8893 }
8894
8895 static const struct file_operations trace_options_core_fops = {
8896         .open = tracing_open_generic,
8897         .read = trace_options_core_read,
8898         .write = trace_options_core_write,
8899         .llseek = generic_file_llseek,
8900 };
8901
8902 struct dentry *trace_create_file(const char *name,
8903                                  umode_t mode,
8904                                  struct dentry *parent,
8905                                  void *data,
8906                                  const struct file_operations *fops)
8907 {
8908         struct dentry *ret;
8909
8910         ret = tracefs_create_file(name, mode, parent, data, fops);
8911         if (!ret)
8912                 pr_warn("Could not create tracefs '%s' entry\n", name);
8913
8914         return ret;
8915 }
8916
8917
8918 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8919 {
8920         struct dentry *d_tracer;
8921
8922         if (tr->options)
8923                 return tr->options;
8924
8925         d_tracer = tracing_get_dentry(tr);
8926         if (IS_ERR(d_tracer))
8927                 return NULL;
8928
8929         tr->options = tracefs_create_dir("options", d_tracer);
8930         if (!tr->options) {
8931                 pr_warn("Could not create tracefs directory 'options'\n");
8932                 return NULL;
8933         }
8934
8935         return tr->options;
8936 }
8937
8938 static void
8939 create_trace_option_file(struct trace_array *tr,
8940                          struct trace_option_dentry *topt,
8941                          struct tracer_flags *flags,
8942                          struct tracer_opt *opt)
8943 {
8944         struct dentry *t_options;
8945
8946         t_options = trace_options_init_dentry(tr);
8947         if (!t_options)
8948                 return;
8949
8950         topt->flags = flags;
8951         topt->opt = opt;
8952         topt->tr = tr;
8953
8954         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8955                                         t_options, topt, &trace_options_fops);
8956
8957 }
8958
8959 static void
8960 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8961 {
8962         struct trace_option_dentry *topts;
8963         struct trace_options *tr_topts;
8964         struct tracer_flags *flags;
8965         struct tracer_opt *opts;
8966         int cnt;
8967         int i;
8968
8969         if (!tracer)
8970                 return;
8971
8972         flags = tracer->flags;
8973
8974         if (!flags || !flags->opts)
8975                 return;
8976
8977         /*
8978          * If this is an instance, only create flags for tracers
8979          * the instance may have.
8980          */
8981         if (!trace_ok_for_array(tracer, tr))
8982                 return;
8983
8984         for (i = 0; i < tr->nr_topts; i++) {
8985                 /* Make sure there's no duplicate flags. */
8986                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8987                         return;
8988         }
8989
8990         opts = flags->opts;
8991
8992         for (cnt = 0; opts[cnt].name; cnt++)
8993                 ;
8994
8995         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8996         if (!topts)
8997                 return;
8998
8999         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9000                             GFP_KERNEL);
9001         if (!tr_topts) {
9002                 kfree(topts);
9003                 return;
9004         }
9005
9006         tr->topts = tr_topts;
9007         tr->topts[tr->nr_topts].tracer = tracer;
9008         tr->topts[tr->nr_topts].topts = topts;
9009         tr->nr_topts++;
9010
9011         for (cnt = 0; opts[cnt].name; cnt++) {
9012                 create_trace_option_file(tr, &topts[cnt], flags,
9013                                          &opts[cnt]);
9014                 MEM_FAIL(topts[cnt].entry == NULL,
9015                           "Failed to create trace option: %s",
9016                           opts[cnt].name);
9017         }
9018 }
9019
9020 static struct dentry *
9021 create_trace_option_core_file(struct trace_array *tr,
9022                               const char *option, long index)
9023 {
9024         struct dentry *t_options;
9025
9026         t_options = trace_options_init_dentry(tr);
9027         if (!t_options)
9028                 return NULL;
9029
9030         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9031                                  (void *)&tr->trace_flags_index[index],
9032                                  &trace_options_core_fops);
9033 }
9034
9035 static void create_trace_options_dir(struct trace_array *tr)
9036 {
9037         struct dentry *t_options;
9038         bool top_level = tr == &global_trace;
9039         int i;
9040
9041         t_options = trace_options_init_dentry(tr);
9042         if (!t_options)
9043                 return;
9044
9045         for (i = 0; trace_options[i]; i++) {
9046                 if (top_level ||
9047                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9048                         create_trace_option_core_file(tr, trace_options[i], i);
9049         }
9050 }
9051
9052 static ssize_t
9053 rb_simple_read(struct file *filp, char __user *ubuf,
9054                size_t cnt, loff_t *ppos)
9055 {
9056         struct trace_array *tr = filp->private_data;
9057         char buf[64];
9058         int r;
9059
9060         r = tracer_tracing_is_on(tr);
9061         r = sprintf(buf, "%d\n", r);
9062
9063         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9064 }
9065
9066 static ssize_t
9067 rb_simple_write(struct file *filp, const char __user *ubuf,
9068                 size_t cnt, loff_t *ppos)
9069 {
9070         struct trace_array *tr = filp->private_data;
9071         struct trace_buffer *buffer = tr->array_buffer.buffer;
9072         unsigned long val;
9073         int ret;
9074
9075         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9076         if (ret)
9077                 return ret;
9078
9079         if (buffer) {
9080                 mutex_lock(&trace_types_lock);
9081                 if (!!val == tracer_tracing_is_on(tr)) {
9082                         val = 0; /* do nothing */
9083                 } else if (val) {
9084                         tracer_tracing_on(tr);
9085                         if (tr->current_trace->start)
9086                                 tr->current_trace->start(tr);
9087                 } else {
9088                         tracer_tracing_off(tr);
9089                         if (tr->current_trace->stop)
9090                                 tr->current_trace->stop(tr);
9091                         /* Wake up any waiters */
9092                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9093                 }
9094                 mutex_unlock(&trace_types_lock);
9095         }
9096
9097         (*ppos)++;
9098
9099         return cnt;
9100 }
9101
9102 static const struct file_operations rb_simple_fops = {
9103         .open           = tracing_open_generic_tr,
9104         .read           = rb_simple_read,
9105         .write          = rb_simple_write,
9106         .release        = tracing_release_generic_tr,
9107         .llseek         = default_llseek,
9108 };
9109
9110 static ssize_t
9111 buffer_percent_read(struct file *filp, char __user *ubuf,
9112                     size_t cnt, loff_t *ppos)
9113 {
9114         struct trace_array *tr = filp->private_data;
9115         char buf[64];
9116         int r;
9117
9118         r = tr->buffer_percent;
9119         r = sprintf(buf, "%d\n", r);
9120
9121         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9122 }
9123
9124 static ssize_t
9125 buffer_percent_write(struct file *filp, const char __user *ubuf,
9126                      size_t cnt, loff_t *ppos)
9127 {
9128         struct trace_array *tr = filp->private_data;
9129         unsigned long val;
9130         int ret;
9131
9132         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9133         if (ret)
9134                 return ret;
9135
9136         if (val > 100)
9137                 return -EINVAL;
9138
9139         tr->buffer_percent = val;
9140
9141         (*ppos)++;
9142
9143         return cnt;
9144 }
9145
9146 static const struct file_operations buffer_percent_fops = {
9147         .open           = tracing_open_generic_tr,
9148         .read           = buffer_percent_read,
9149         .write          = buffer_percent_write,
9150         .release        = tracing_release_generic_tr,
9151         .llseek         = default_llseek,
9152 };
9153
9154 static ssize_t
9155 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9156 {
9157         struct trace_array *tr = filp->private_data;
9158         size_t size;
9159         char buf[64];
9160         int order;
9161         int r;
9162
9163         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9164         size = (PAGE_SIZE << order) / 1024;
9165
9166         r = sprintf(buf, "%zd\n", size);
9167
9168         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9169 }
9170
9171 static ssize_t
9172 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9173                          size_t cnt, loff_t *ppos)
9174 {
9175         struct trace_array *tr = filp->private_data;
9176         unsigned long val;
9177         int old_order;
9178         int order;
9179         int pages;
9180         int ret;
9181
9182         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9183         if (ret)
9184                 return ret;
9185
9186         val *= 1024; /* value passed in is in KB */
9187
9188         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9189         order = fls(pages - 1);
9190
9191         /* limit between 1 and 128 system pages */
9192         if (order < 0 || order > 7)
9193                 return -EINVAL;
9194
9195         /* Do not allow tracing while changing the order of the ring buffer */
9196         tracing_stop_tr(tr);
9197
9198         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9199         if (old_order == order)
9200                 goto out;
9201
9202         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9203         if (ret)
9204                 goto out;
9205
9206 #ifdef CONFIG_TRACER_MAX_TRACE
9207
9208         if (!tr->allocated_snapshot)
9209                 goto out_max;
9210
9211         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9212         if (ret) {
9213                 /* Put back the old order */
9214                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9215                 if (WARN_ON_ONCE(cnt)) {
9216                         /*
9217                          * AARGH! We are left with different orders!
9218                          * The max buffer is our "snapshot" buffer.
9219                          * When a tracer needs a snapshot (one of the
9220                          * latency tracers), it swaps the max buffer
9221                          * with the saved snap shot. We succeeded to
9222                          * update the order of the main buffer, but failed to
9223                          * update the order of the max buffer. But when we tried
9224                          * to reset the main buffer to the original size, we
9225                          * failed there too. This is very unlikely to
9226                          * happen, but if it does, warn and kill all
9227                          * tracing.
9228                          */
9229                         tracing_disabled = 1;
9230                 }
9231                 goto out;
9232         }
9233  out_max:
9234 #endif
9235         (*ppos)++;
9236  out:
9237         if (ret)
9238                 cnt = ret;
9239         tracing_start_tr(tr);
9240         return cnt;
9241 }
9242
9243 static const struct file_operations buffer_subbuf_size_fops = {
9244         .open           = tracing_open_generic_tr,
9245         .read           = buffer_subbuf_size_read,
9246         .write          = buffer_subbuf_size_write,
9247         .release        = tracing_release_generic_tr,
9248         .llseek         = default_llseek,
9249 };
9250
9251 static struct dentry *trace_instance_dir;
9252
9253 static void
9254 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9255
9256 static int
9257 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9258 {
9259         enum ring_buffer_flags rb_flags;
9260
9261         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9262
9263         buf->tr = tr;
9264
9265         if (tr->range_addr_start && tr->range_addr_size) {
9266                 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9267                                                       tr->range_addr_start,
9268                                                       tr->range_addr_size);
9269
9270                 ring_buffer_last_boot_delta(buf->buffer,
9271                                             &tr->text_delta, &tr->data_delta);
9272                 /*
9273                  * This is basically the same as a mapped buffer,
9274                  * with the same restrictions.
9275                  */
9276                 tr->mapped++;
9277         } else {
9278                 buf->buffer = ring_buffer_alloc(size, rb_flags);
9279         }
9280         if (!buf->buffer)
9281                 return -ENOMEM;
9282
9283         buf->data = alloc_percpu(struct trace_array_cpu);
9284         if (!buf->data) {
9285                 ring_buffer_free(buf->buffer);
9286                 buf->buffer = NULL;
9287                 return -ENOMEM;
9288         }
9289
9290         /* Allocate the first page for all buffers */
9291         set_buffer_entries(&tr->array_buffer,
9292                            ring_buffer_size(tr->array_buffer.buffer, 0));
9293
9294         return 0;
9295 }
9296
9297 static void free_trace_buffer(struct array_buffer *buf)
9298 {
9299         if (buf->buffer) {
9300                 ring_buffer_free(buf->buffer);
9301                 buf->buffer = NULL;
9302                 free_percpu(buf->data);
9303                 buf->data = NULL;
9304         }
9305 }
9306
9307 static int allocate_trace_buffers(struct trace_array *tr, int size)
9308 {
9309         int ret;
9310
9311         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9312         if (ret)
9313                 return ret;
9314
9315 #ifdef CONFIG_TRACER_MAX_TRACE
9316         /* Fix mapped buffer trace arrays do not have snapshot buffers */
9317         if (tr->range_addr_start)
9318                 return 0;
9319
9320         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9321                                     allocate_snapshot ? size : 1);
9322         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9323                 free_trace_buffer(&tr->array_buffer);
9324                 return -ENOMEM;
9325         }
9326         tr->allocated_snapshot = allocate_snapshot;
9327
9328         allocate_snapshot = false;
9329 #endif
9330
9331         return 0;
9332 }
9333
9334 static void free_trace_buffers(struct trace_array *tr)
9335 {
9336         if (!tr)
9337                 return;
9338
9339         free_trace_buffer(&tr->array_buffer);
9340
9341 #ifdef CONFIG_TRACER_MAX_TRACE
9342         free_trace_buffer(&tr->max_buffer);
9343 #endif
9344 }
9345
9346 static void init_trace_flags_index(struct trace_array *tr)
9347 {
9348         int i;
9349
9350         /* Used by the trace options files */
9351         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9352                 tr->trace_flags_index[i] = i;
9353 }
9354
9355 static void __update_tracer_options(struct trace_array *tr)
9356 {
9357         struct tracer *t;
9358
9359         for (t = trace_types; t; t = t->next)
9360                 add_tracer_options(tr, t);
9361 }
9362
9363 static void update_tracer_options(struct trace_array *tr)
9364 {
9365         mutex_lock(&trace_types_lock);
9366         tracer_options_updated = true;
9367         __update_tracer_options(tr);
9368         mutex_unlock(&trace_types_lock);
9369 }
9370
9371 /* Must have trace_types_lock held */
9372 struct trace_array *trace_array_find(const char *instance)
9373 {
9374         struct trace_array *tr, *found = NULL;
9375
9376         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9377                 if (tr->name && strcmp(tr->name, instance) == 0) {
9378                         found = tr;
9379                         break;
9380                 }
9381         }
9382
9383         return found;
9384 }
9385
9386 struct trace_array *trace_array_find_get(const char *instance)
9387 {
9388         struct trace_array *tr;
9389
9390         mutex_lock(&trace_types_lock);
9391         tr = trace_array_find(instance);
9392         if (tr)
9393                 tr->ref++;
9394         mutex_unlock(&trace_types_lock);
9395
9396         return tr;
9397 }
9398
9399 static int trace_array_create_dir(struct trace_array *tr)
9400 {
9401         int ret;
9402
9403         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9404         if (!tr->dir)
9405                 return -EINVAL;
9406
9407         ret = event_trace_add_tracer(tr->dir, tr);
9408         if (ret) {
9409                 tracefs_remove(tr->dir);
9410                 return ret;
9411         }
9412
9413         init_tracer_tracefs(tr, tr->dir);
9414         __update_tracer_options(tr);
9415
9416         return ret;
9417 }
9418
9419 static struct trace_array *
9420 trace_array_create_systems(const char *name, const char *systems,
9421                            unsigned long range_addr_start,
9422                            unsigned long range_addr_size)
9423 {
9424         struct trace_array *tr;
9425         int ret;
9426
9427         ret = -ENOMEM;
9428         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9429         if (!tr)
9430                 return ERR_PTR(ret);
9431
9432         tr->name = kstrdup(name, GFP_KERNEL);
9433         if (!tr->name)
9434                 goto out_free_tr;
9435
9436         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9437                 goto out_free_tr;
9438
9439         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9440                 goto out_free_tr;
9441
9442         if (systems) {
9443                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9444                 if (!tr->system_names)
9445                         goto out_free_tr;
9446         }
9447
9448         /* Only for boot up memory mapped ring buffers */
9449         tr->range_addr_start = range_addr_start;
9450         tr->range_addr_size = range_addr_size;
9451
9452         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9453
9454         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9455
9456         raw_spin_lock_init(&tr->start_lock);
9457
9458         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9459 #ifdef CONFIG_TRACER_MAX_TRACE
9460         spin_lock_init(&tr->snapshot_trigger_lock);
9461 #endif
9462         tr->current_trace = &nop_trace;
9463
9464         INIT_LIST_HEAD(&tr->systems);
9465         INIT_LIST_HEAD(&tr->events);
9466         INIT_LIST_HEAD(&tr->hist_vars);
9467         INIT_LIST_HEAD(&tr->err_log);
9468
9469         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9470                 goto out_free_tr;
9471
9472         /* The ring buffer is defaultly expanded */
9473         trace_set_ring_buffer_expanded(tr);
9474
9475         if (ftrace_allocate_ftrace_ops(tr) < 0)
9476                 goto out_free_tr;
9477
9478         ftrace_init_trace_array(tr);
9479
9480         init_trace_flags_index(tr);
9481
9482         if (trace_instance_dir) {
9483                 ret = trace_array_create_dir(tr);
9484                 if (ret)
9485                         goto out_free_tr;
9486         } else
9487                 __trace_early_add_events(tr);
9488
9489         list_add(&tr->list, &ftrace_trace_arrays);
9490
9491         tr->ref++;
9492
9493         return tr;
9494
9495  out_free_tr:
9496         ftrace_free_ftrace_ops(tr);
9497         free_trace_buffers(tr);
9498         free_cpumask_var(tr->pipe_cpumask);
9499         free_cpumask_var(tr->tracing_cpumask);
9500         kfree_const(tr->system_names);
9501         kfree(tr->name);
9502         kfree(tr);
9503
9504         return ERR_PTR(ret);
9505 }
9506
9507 static struct trace_array *trace_array_create(const char *name)
9508 {
9509         return trace_array_create_systems(name, NULL, 0, 0);
9510 }
9511
9512 static int instance_mkdir(const char *name)
9513 {
9514         struct trace_array *tr;
9515         int ret;
9516
9517         mutex_lock(&event_mutex);
9518         mutex_lock(&trace_types_lock);
9519
9520         ret = -EEXIST;
9521         if (trace_array_find(name))
9522                 goto out_unlock;
9523
9524         tr = trace_array_create(name);
9525
9526         ret = PTR_ERR_OR_ZERO(tr);
9527
9528 out_unlock:
9529         mutex_unlock(&trace_types_lock);
9530         mutex_unlock(&event_mutex);
9531         return ret;
9532 }
9533
9534 static u64 map_pages(u64 start, u64 size)
9535 {
9536         struct page **pages;
9537         phys_addr_t page_start;
9538         unsigned int page_count;
9539         unsigned int i;
9540         void *vaddr;
9541
9542         page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9543
9544         page_start = start;
9545         pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9546         if (!pages)
9547                 return 0;
9548
9549         for (i = 0; i < page_count; i++) {
9550                 phys_addr_t addr = page_start + i * PAGE_SIZE;
9551                 pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9552         }
9553         vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9554         kfree(pages);
9555
9556         return (u64)(unsigned long)vaddr;
9557 }
9558
9559 /**
9560  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9561  * @name: The name of the trace array to be looked up/created.
9562  * @systems: A list of systems to create event directories for (NULL for all)
9563  *
9564  * Returns pointer to trace array with given name.
9565  * NULL, if it cannot be created.
9566  *
9567  * NOTE: This function increments the reference counter associated with the
9568  * trace array returned. This makes sure it cannot be freed while in use.
9569  * Use trace_array_put() once the trace array is no longer needed.
9570  * If the trace_array is to be freed, trace_array_destroy() needs to
9571  * be called after the trace_array_put(), or simply let user space delete
9572  * it from the tracefs instances directory. But until the
9573  * trace_array_put() is called, user space can not delete it.
9574  *
9575  */
9576 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9577 {
9578         struct trace_array *tr;
9579
9580         mutex_lock(&event_mutex);
9581         mutex_lock(&trace_types_lock);
9582
9583         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9584                 if (tr->name && strcmp(tr->name, name) == 0)
9585                         goto out_unlock;
9586         }
9587
9588         tr = trace_array_create_systems(name, systems, 0, 0);
9589
9590         if (IS_ERR(tr))
9591                 tr = NULL;
9592 out_unlock:
9593         if (tr)
9594                 tr->ref++;
9595
9596         mutex_unlock(&trace_types_lock);
9597         mutex_unlock(&event_mutex);
9598         return tr;
9599 }
9600 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9601
9602 static int __remove_instance(struct trace_array *tr)
9603 {
9604         int i;
9605
9606         /* Reference counter for a newly created trace array = 1. */
9607         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9608                 return -EBUSY;
9609
9610         list_del(&tr->list);
9611
9612         /* Disable all the flags that were enabled coming in */
9613         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9614                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9615                         set_tracer_flag(tr, 1 << i, 0);
9616         }
9617
9618         if (printk_trace == tr)
9619                 update_printk_trace(&global_trace);
9620
9621         tracing_set_nop(tr);
9622         clear_ftrace_function_probes(tr);
9623         event_trace_del_tracer(tr);
9624         ftrace_clear_pids(tr);
9625         ftrace_destroy_function_files(tr);
9626         tracefs_remove(tr->dir);
9627         free_percpu(tr->last_func_repeats);
9628         free_trace_buffers(tr);
9629         clear_tracing_err_log(tr);
9630
9631         for (i = 0; i < tr->nr_topts; i++) {
9632                 kfree(tr->topts[i].topts);
9633         }
9634         kfree(tr->topts);
9635
9636         free_cpumask_var(tr->pipe_cpumask);
9637         free_cpumask_var(tr->tracing_cpumask);
9638         kfree_const(tr->system_names);
9639         kfree(tr->name);
9640         kfree(tr);
9641
9642         return 0;
9643 }
9644
9645 int trace_array_destroy(struct trace_array *this_tr)
9646 {
9647         struct trace_array *tr;
9648         int ret;
9649
9650         if (!this_tr)
9651                 return -EINVAL;
9652
9653         mutex_lock(&event_mutex);
9654         mutex_lock(&trace_types_lock);
9655
9656         ret = -ENODEV;
9657
9658         /* Making sure trace array exists before destroying it. */
9659         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9660                 if (tr == this_tr) {
9661                         ret = __remove_instance(tr);
9662                         break;
9663                 }
9664         }
9665
9666         mutex_unlock(&trace_types_lock);
9667         mutex_unlock(&event_mutex);
9668
9669         return ret;
9670 }
9671 EXPORT_SYMBOL_GPL(trace_array_destroy);
9672
9673 static int instance_rmdir(const char *name)
9674 {
9675         struct trace_array *tr;
9676         int ret;
9677
9678         mutex_lock(&event_mutex);
9679         mutex_lock(&trace_types_lock);
9680
9681         ret = -ENODEV;
9682         tr = trace_array_find(name);
9683         if (tr)
9684                 ret = __remove_instance(tr);
9685
9686         mutex_unlock(&trace_types_lock);
9687         mutex_unlock(&event_mutex);
9688
9689         return ret;
9690 }
9691
9692 static __init void create_trace_instances(struct dentry *d_tracer)
9693 {
9694         struct trace_array *tr;
9695
9696         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9697                                                          instance_mkdir,
9698                                                          instance_rmdir);
9699         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9700                 return;
9701
9702         mutex_lock(&event_mutex);
9703         mutex_lock(&trace_types_lock);
9704
9705         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9706                 if (!tr->name)
9707                         continue;
9708                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9709                              "Failed to create instance directory\n"))
9710                         break;
9711         }
9712
9713         mutex_unlock(&trace_types_lock);
9714         mutex_unlock(&event_mutex);
9715 }
9716
9717 static void
9718 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9719 {
9720         int cpu;
9721
9722         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9723                         tr, &show_traces_fops);
9724
9725         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9726                         tr, &set_tracer_fops);
9727
9728         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9729                           tr, &tracing_cpumask_fops);
9730
9731         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9732                           tr, &tracing_iter_fops);
9733
9734         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9735                           tr, &tracing_fops);
9736
9737         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9738                           tr, &tracing_pipe_fops);
9739
9740         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9741                           tr, &tracing_entries_fops);
9742
9743         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9744                           tr, &tracing_total_entries_fops);
9745
9746         trace_create_file("free_buffer", 0200, d_tracer,
9747                           tr, &tracing_free_buffer_fops);
9748
9749         trace_create_file("trace_marker", 0220, d_tracer,
9750                           tr, &tracing_mark_fops);
9751
9752         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9753
9754         trace_create_file("trace_marker_raw", 0220, d_tracer,
9755                           tr, &tracing_mark_raw_fops);
9756
9757         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9758                           &trace_clock_fops);
9759
9760         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9761                           tr, &rb_simple_fops);
9762
9763         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9764                           &trace_time_stamp_mode_fops);
9765
9766         tr->buffer_percent = 50;
9767
9768         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9769                         tr, &buffer_percent_fops);
9770
9771         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9772                           tr, &buffer_subbuf_size_fops);
9773
9774         create_trace_options_dir(tr);
9775
9776 #ifdef CONFIG_TRACER_MAX_TRACE
9777         trace_create_maxlat_file(tr, d_tracer);
9778 #endif
9779
9780         if (ftrace_create_function_files(tr, d_tracer))
9781                 MEM_FAIL(1, "Could not allocate function filter files");
9782
9783         if (tr->range_addr_start) {
9784                 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9785                                   tr, &last_boot_fops);
9786 #ifdef CONFIG_TRACER_SNAPSHOT
9787         } else {
9788                 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9789                                   tr, &snapshot_fops);
9790 #endif
9791         }
9792
9793         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9794                           tr, &tracing_err_log_fops);
9795
9796         for_each_tracing_cpu(cpu)
9797                 tracing_init_tracefs_percpu(tr, cpu);
9798
9799         ftrace_init_tracefs(tr, d_tracer);
9800 }
9801
9802 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9803 {
9804         struct vfsmount *mnt;
9805         struct file_system_type *type;
9806
9807         /*
9808          * To maintain backward compatibility for tools that mount
9809          * debugfs to get to the tracing facility, tracefs is automatically
9810          * mounted to the debugfs/tracing directory.
9811          */
9812         type = get_fs_type("tracefs");
9813         if (!type)
9814                 return NULL;
9815         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9816         put_filesystem(type);
9817         if (IS_ERR(mnt))
9818                 return NULL;
9819         mntget(mnt);
9820
9821         return mnt;
9822 }
9823
9824 /**
9825  * tracing_init_dentry - initialize top level trace array
9826  *
9827  * This is called when creating files or directories in the tracing
9828  * directory. It is called via fs_initcall() by any of the boot up code
9829  * and expects to return the dentry of the top level tracing directory.
9830  */
9831 int tracing_init_dentry(void)
9832 {
9833         struct trace_array *tr = &global_trace;
9834
9835         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9836                 pr_warn("Tracing disabled due to lockdown\n");
9837                 return -EPERM;
9838         }
9839
9840         /* The top level trace array uses  NULL as parent */
9841         if (tr->dir)
9842                 return 0;
9843
9844         if (WARN_ON(!tracefs_initialized()))
9845                 return -ENODEV;
9846
9847         /*
9848          * As there may still be users that expect the tracing
9849          * files to exist in debugfs/tracing, we must automount
9850          * the tracefs file system there, so older tools still
9851          * work with the newer kernel.
9852          */
9853         tr->dir = debugfs_create_automount("tracing", NULL,
9854                                            trace_automount, NULL);
9855
9856         return 0;
9857 }
9858
9859 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9860 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9861
9862 static struct workqueue_struct *eval_map_wq __initdata;
9863 static struct work_struct eval_map_work __initdata;
9864 static struct work_struct tracerfs_init_work __initdata;
9865
9866 static void __init eval_map_work_func(struct work_struct *work)
9867 {
9868         int len;
9869
9870         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9871         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9872 }
9873
9874 static int __init trace_eval_init(void)
9875 {
9876         INIT_WORK(&eval_map_work, eval_map_work_func);
9877
9878         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9879         if (!eval_map_wq) {
9880                 pr_err("Unable to allocate eval_map_wq\n");
9881                 /* Do work here */
9882                 eval_map_work_func(&eval_map_work);
9883                 return -ENOMEM;
9884         }
9885
9886         queue_work(eval_map_wq, &eval_map_work);
9887         return 0;
9888 }
9889
9890 subsys_initcall(trace_eval_init);
9891
9892 static int __init trace_eval_sync(void)
9893 {
9894         /* Make sure the eval map updates are finished */
9895         if (eval_map_wq)
9896                 destroy_workqueue(eval_map_wq);
9897         return 0;
9898 }
9899
9900 late_initcall_sync(trace_eval_sync);
9901
9902
9903 #ifdef CONFIG_MODULES
9904 static void trace_module_add_evals(struct module *mod)
9905 {
9906         if (!mod->num_trace_evals)
9907                 return;
9908
9909         /*
9910          * Modules with bad taint do not have events created, do
9911          * not bother with enums either.
9912          */
9913         if (trace_module_has_bad_taint(mod))
9914                 return;
9915
9916         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9917 }
9918
9919 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9920 static void trace_module_remove_evals(struct module *mod)
9921 {
9922         union trace_eval_map_item *map;
9923         union trace_eval_map_item **last = &trace_eval_maps;
9924
9925         if (!mod->num_trace_evals)
9926                 return;
9927
9928         mutex_lock(&trace_eval_mutex);
9929
9930         map = trace_eval_maps;
9931
9932         while (map) {
9933                 if (map->head.mod == mod)
9934                         break;
9935                 map = trace_eval_jmp_to_tail(map);
9936                 last = &map->tail.next;
9937                 map = map->tail.next;
9938         }
9939         if (!map)
9940                 goto out;
9941
9942         *last = trace_eval_jmp_to_tail(map)->tail.next;
9943         kfree(map);
9944  out:
9945         mutex_unlock(&trace_eval_mutex);
9946 }
9947 #else
9948 static inline void trace_module_remove_evals(struct module *mod) { }
9949 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9950
9951 static int trace_module_notify(struct notifier_block *self,
9952                                unsigned long val, void *data)
9953 {
9954         struct module *mod = data;
9955
9956         switch (val) {
9957         case MODULE_STATE_COMING:
9958                 trace_module_add_evals(mod);
9959                 break;
9960         case MODULE_STATE_GOING:
9961                 trace_module_remove_evals(mod);
9962                 break;
9963         }
9964
9965         return NOTIFY_OK;
9966 }
9967
9968 static struct notifier_block trace_module_nb = {
9969         .notifier_call = trace_module_notify,
9970         .priority = 0,
9971 };
9972 #endif /* CONFIG_MODULES */
9973
9974 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9975 {
9976
9977         event_trace_init();
9978
9979         init_tracer_tracefs(&global_trace, NULL);
9980         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9981
9982         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9983                         &global_trace, &tracing_thresh_fops);
9984
9985         trace_create_file("README", TRACE_MODE_READ, NULL,
9986                         NULL, &tracing_readme_fops);
9987
9988         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9989                         NULL, &tracing_saved_cmdlines_fops);
9990
9991         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9992                           NULL, &tracing_saved_cmdlines_size_fops);
9993
9994         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9995                         NULL, &tracing_saved_tgids_fops);
9996
9997         trace_create_eval_file(NULL);
9998
9999 #ifdef CONFIG_MODULES
10000         register_module_notifier(&trace_module_nb);
10001 #endif
10002
10003 #ifdef CONFIG_DYNAMIC_FTRACE
10004         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10005                         NULL, &tracing_dyn_info_fops);
10006 #endif
10007
10008         create_trace_instances(NULL);
10009
10010         update_tracer_options(&global_trace);
10011 }
10012
10013 static __init int tracer_init_tracefs(void)
10014 {
10015         int ret;
10016
10017         trace_access_lock_init();
10018
10019         ret = tracing_init_dentry();
10020         if (ret)
10021                 return 0;
10022
10023         if (eval_map_wq) {
10024                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10025                 queue_work(eval_map_wq, &tracerfs_init_work);
10026         } else {
10027                 tracer_init_tracefs_work_func(NULL);
10028         }
10029
10030         rv_init_interface();
10031
10032         return 0;
10033 }
10034
10035 fs_initcall(tracer_init_tracefs);
10036
10037 static int trace_die_panic_handler(struct notifier_block *self,
10038                                 unsigned long ev, void *unused);
10039
10040 static struct notifier_block trace_panic_notifier = {
10041         .notifier_call = trace_die_panic_handler,
10042         .priority = INT_MAX - 1,
10043 };
10044
10045 static struct notifier_block trace_die_notifier = {
10046         .notifier_call = trace_die_panic_handler,
10047         .priority = INT_MAX - 1,
10048 };
10049
10050 /*
10051  * The idea is to execute the following die/panic callback early, in order
10052  * to avoid showing irrelevant information in the trace (like other panic
10053  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10054  * warnings get disabled (to prevent potential log flooding).
10055  */
10056 static int trace_die_panic_handler(struct notifier_block *self,
10057                                 unsigned long ev, void *unused)
10058 {
10059         if (!ftrace_dump_on_oops_enabled())
10060                 return NOTIFY_DONE;
10061
10062         /* The die notifier requires DIE_OOPS to trigger */
10063         if (self == &trace_die_notifier && ev != DIE_OOPS)
10064                 return NOTIFY_DONE;
10065
10066         ftrace_dump(DUMP_PARAM);
10067
10068         return NOTIFY_DONE;
10069 }
10070
10071 /*
10072  * printk is set to max of 1024, we really don't need it that big.
10073  * Nothing should be printing 1000 characters anyway.
10074  */
10075 #define TRACE_MAX_PRINT         1000
10076
10077 /*
10078  * Define here KERN_TRACE so that we have one place to modify
10079  * it if we decide to change what log level the ftrace dump
10080  * should be at.
10081  */
10082 #define KERN_TRACE              KERN_EMERG
10083
10084 void
10085 trace_printk_seq(struct trace_seq *s)
10086 {
10087         /* Probably should print a warning here. */
10088         if (s->seq.len >= TRACE_MAX_PRINT)
10089                 s->seq.len = TRACE_MAX_PRINT;
10090
10091         /*
10092          * More paranoid code. Although the buffer size is set to
10093          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10094          * an extra layer of protection.
10095          */
10096         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10097                 s->seq.len = s->seq.size - 1;
10098
10099         /* should be zero ended, but we are paranoid. */
10100         s->buffer[s->seq.len] = 0;
10101
10102         printk(KERN_TRACE "%s", s->buffer);
10103
10104         trace_seq_init(s);
10105 }
10106
10107 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10108 {
10109         iter->tr = tr;
10110         iter->trace = iter->tr->current_trace;
10111         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10112         iter->array_buffer = &tr->array_buffer;
10113
10114         if (iter->trace && iter->trace->open)
10115                 iter->trace->open(iter);
10116
10117         /* Annotate start of buffers if we had overruns */
10118         if (ring_buffer_overruns(iter->array_buffer->buffer))
10119                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10120
10121         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10122         if (trace_clocks[iter->tr->clock_id].in_ns)
10123                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10124
10125         /* Can not use kmalloc for iter.temp and iter.fmt */
10126         iter->temp = static_temp_buf;
10127         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10128         iter->fmt = static_fmt_buf;
10129         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10130 }
10131
10132 void trace_init_global_iter(struct trace_iterator *iter)
10133 {
10134         trace_init_iter(iter, &global_trace);
10135 }
10136
10137 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10138 {
10139         /* use static because iter can be a bit big for the stack */
10140         static struct trace_iterator iter;
10141         unsigned int old_userobj;
10142         unsigned long flags;
10143         int cnt = 0, cpu;
10144
10145         /*
10146          * Always turn off tracing when we dump.
10147          * We don't need to show trace output of what happens
10148          * between multiple crashes.
10149          *
10150          * If the user does a sysrq-z, then they can re-enable
10151          * tracing with echo 1 > tracing_on.
10152          */
10153         tracer_tracing_off(tr);
10154
10155         local_irq_save(flags);
10156
10157         /* Simulate the iterator */
10158         trace_init_iter(&iter, tr);
10159
10160         for_each_tracing_cpu(cpu) {
10161                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10162         }
10163
10164         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10165
10166         /* don't look at user memory in panic mode */
10167         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10168
10169         if (dump_mode == DUMP_ORIG)
10170                 iter.cpu_file = raw_smp_processor_id();
10171         else
10172                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10173
10174         if (tr == &global_trace)
10175                 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10176         else
10177                 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10178
10179         /* Did function tracer already get disabled? */
10180         if (ftrace_is_dead()) {
10181                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10182                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10183         }
10184
10185         /*
10186          * We need to stop all tracing on all CPUS to read
10187          * the next buffer. This is a bit expensive, but is
10188          * not done often. We fill all what we can read,
10189          * and then release the locks again.
10190          */
10191
10192         while (!trace_empty(&iter)) {
10193
10194                 if (!cnt)
10195                         printk(KERN_TRACE "---------------------------------\n");
10196
10197                 cnt++;
10198
10199                 trace_iterator_reset(&iter);
10200                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10201
10202                 if (trace_find_next_entry_inc(&iter) != NULL) {
10203                         int ret;
10204
10205                         ret = print_trace_line(&iter);
10206                         if (ret != TRACE_TYPE_NO_CONSUME)
10207                                 trace_consume(&iter);
10208                 }
10209                 touch_nmi_watchdog();
10210
10211                 trace_printk_seq(&iter.seq);
10212         }
10213
10214         if (!cnt)
10215                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10216         else
10217                 printk(KERN_TRACE "---------------------------------\n");
10218
10219         tr->trace_flags |= old_userobj;
10220
10221         for_each_tracing_cpu(cpu) {
10222                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10223         }
10224         local_irq_restore(flags);
10225 }
10226
10227 static void ftrace_dump_by_param(void)
10228 {
10229         bool first_param = true;
10230         char dump_param[MAX_TRACER_SIZE];
10231         char *buf, *token, *inst_name;
10232         struct trace_array *tr;
10233
10234         strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10235         buf = dump_param;
10236
10237         while ((token = strsep(&buf, ",")) != NULL) {
10238                 if (first_param) {
10239                         first_param = false;
10240                         if (!strcmp("0", token))
10241                                 continue;
10242                         else if (!strcmp("1", token)) {
10243                                 ftrace_dump_one(&global_trace, DUMP_ALL);
10244                                 continue;
10245                         }
10246                         else if (!strcmp("2", token) ||
10247                           !strcmp("orig_cpu", token)) {
10248                                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10249                                 continue;
10250                         }
10251                 }
10252
10253                 inst_name = strsep(&token, "=");
10254                 tr = trace_array_find(inst_name);
10255                 if (!tr) {
10256                         printk(KERN_TRACE "Instance %s not found\n", inst_name);
10257                         continue;
10258                 }
10259
10260                 if (token && (!strcmp("2", token) ||
10261                           !strcmp("orig_cpu", token)))
10262                         ftrace_dump_one(tr, DUMP_ORIG);
10263                 else
10264                         ftrace_dump_one(tr, DUMP_ALL);
10265         }
10266 }
10267
10268 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10269 {
10270         static atomic_t dump_running;
10271
10272         /* Only allow one dump user at a time. */
10273         if (atomic_inc_return(&dump_running) != 1) {
10274                 atomic_dec(&dump_running);
10275                 return;
10276         }
10277
10278         switch (oops_dump_mode) {
10279         case DUMP_ALL:
10280                 ftrace_dump_one(&global_trace, DUMP_ALL);
10281                 break;
10282         case DUMP_ORIG:
10283                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10284                 break;
10285         case DUMP_PARAM:
10286                 ftrace_dump_by_param();
10287                 break;
10288         case DUMP_NONE:
10289                 break;
10290         default:
10291                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10292                 ftrace_dump_one(&global_trace, DUMP_ALL);
10293         }
10294
10295         atomic_dec(&dump_running);
10296 }
10297 EXPORT_SYMBOL_GPL(ftrace_dump);
10298
10299 #define WRITE_BUFSIZE  4096
10300
10301 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10302                                 size_t count, loff_t *ppos,
10303                                 int (*createfn)(const char *))
10304 {
10305         char *kbuf, *buf, *tmp;
10306         int ret = 0;
10307         size_t done = 0;
10308         size_t size;
10309
10310         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10311         if (!kbuf)
10312                 return -ENOMEM;
10313
10314         while (done < count) {
10315                 size = count - done;
10316
10317                 if (size >= WRITE_BUFSIZE)
10318                         size = WRITE_BUFSIZE - 1;
10319
10320                 if (copy_from_user(kbuf, buffer + done, size)) {
10321                         ret = -EFAULT;
10322                         goto out;
10323                 }
10324                 kbuf[size] = '\0';
10325                 buf = kbuf;
10326                 do {
10327                         tmp = strchr(buf, '\n');
10328                         if (tmp) {
10329                                 *tmp = '\0';
10330                                 size = tmp - buf + 1;
10331                         } else {
10332                                 size = strlen(buf);
10333                                 if (done + size < count) {
10334                                         if (buf != kbuf)
10335                                                 break;
10336                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10337                                         pr_warn("Line length is too long: Should be less than %d\n",
10338                                                 WRITE_BUFSIZE - 2);
10339                                         ret = -EINVAL;
10340                                         goto out;
10341                                 }
10342                         }
10343                         done += size;
10344
10345                         /* Remove comments */
10346                         tmp = strchr(buf, '#');
10347
10348                         if (tmp)
10349                                 *tmp = '\0';
10350
10351                         ret = createfn(buf);
10352                         if (ret)
10353                                 goto out;
10354                         buf += size;
10355
10356                 } while (done < count);
10357         }
10358         ret = done;
10359
10360 out:
10361         kfree(kbuf);
10362
10363         return ret;
10364 }
10365
10366 #ifdef CONFIG_TRACER_MAX_TRACE
10367 __init static bool tr_needs_alloc_snapshot(const char *name)
10368 {
10369         char *test;
10370         int len = strlen(name);
10371         bool ret;
10372
10373         if (!boot_snapshot_index)
10374                 return false;
10375
10376         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10377             boot_snapshot_info[len] == '\t')
10378                 return true;
10379
10380         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10381         if (!test)
10382                 return false;
10383
10384         sprintf(test, "\t%s\t", name);
10385         ret = strstr(boot_snapshot_info, test) == NULL;
10386         kfree(test);
10387         return ret;
10388 }
10389
10390 __init static void do_allocate_snapshot(const char *name)
10391 {
10392         if (!tr_needs_alloc_snapshot(name))
10393                 return;
10394
10395         /*
10396          * When allocate_snapshot is set, the next call to
10397          * allocate_trace_buffers() (called by trace_array_get_by_name())
10398          * will allocate the snapshot buffer. That will alse clear
10399          * this flag.
10400          */
10401         allocate_snapshot = true;
10402 }
10403 #else
10404 static inline void do_allocate_snapshot(const char *name) { }
10405 #endif
10406
10407 __init static void enable_instances(void)
10408 {
10409         struct trace_array *tr;
10410         char *curr_str;
10411         char *name;
10412         char *str;
10413         char *tok;
10414
10415         /* A tab is always appended */
10416         boot_instance_info[boot_instance_index - 1] = '\0';
10417         str = boot_instance_info;
10418
10419         while ((curr_str = strsep(&str, "\t"))) {
10420                 phys_addr_t start = 0;
10421                 phys_addr_t size = 0;
10422                 unsigned long addr = 0;
10423                 bool traceprintk = false;
10424                 bool traceoff = false;
10425                 char *flag_delim;
10426                 char *addr_delim;
10427
10428                 tok = strsep(&curr_str, ",");
10429
10430                 flag_delim = strchr(tok, '^');
10431                 addr_delim = strchr(tok, '@');
10432
10433                 if (addr_delim)
10434                         *addr_delim++ = '\0';
10435
10436                 if (flag_delim)
10437                         *flag_delim++ = '\0';
10438
10439                 name = tok;
10440
10441                 if (flag_delim) {
10442                         char *flag;
10443
10444                         while ((flag = strsep(&flag_delim, "^"))) {
10445                                 if (strcmp(flag, "traceoff") == 0) {
10446                                         traceoff = true;
10447                                 } else if ((strcmp(flag, "printk") == 0) ||
10448                                            (strcmp(flag, "traceprintk") == 0) ||
10449                                            (strcmp(flag, "trace_printk") == 0)) {
10450                                         traceprintk = true;
10451                                 } else {
10452                                         pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10453                                                 flag, name);
10454                                 }
10455                         }
10456                 }
10457
10458                 tok = addr_delim;
10459                 if (tok && isdigit(*tok)) {
10460                         start = memparse(tok, &tok);
10461                         if (!start) {
10462                                 pr_warn("Tracing: Invalid boot instance address for %s\n",
10463                                         name);
10464                                 continue;
10465                         }
10466                         if (*tok != ':') {
10467                                 pr_warn("Tracing: No size specified for instance %s\n", name);
10468                                 continue;
10469                         }
10470                         tok++;
10471                         size = memparse(tok, &tok);
10472                         if (!size) {
10473                                 pr_warn("Tracing: Invalid boot instance size for %s\n",
10474                                         name);
10475                                 continue;
10476                         }
10477                 } else if (tok) {
10478                         if (!reserve_mem_find_by_name(tok, &start, &size)) {
10479                                 start = 0;
10480                                 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10481                                 continue;
10482                         }
10483                 }
10484
10485                 if (start) {
10486                         addr = map_pages(start, size);
10487                         if (addr) {
10488                                 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10489                                         name, &start, (unsigned long)size);
10490                         } else {
10491                                 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10492                                 continue;
10493                         }
10494                 } else {
10495                         /* Only non mapped buffers have snapshot buffers */
10496                         if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10497                                 do_allocate_snapshot(name);
10498                 }
10499
10500                 tr = trace_array_create_systems(name, NULL, addr, size);
10501                 if (IS_ERR(tr)) {
10502                         pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10503                         continue;
10504                 }
10505
10506                 if (traceoff)
10507                         tracer_tracing_off(tr);
10508
10509                 if (traceprintk)
10510                         update_printk_trace(tr);
10511
10512                 /*
10513                  * If start is set, then this is a mapped buffer, and
10514                  * cannot be deleted by user space, so keep the reference
10515                  * to it.
10516                  */
10517                 if (start) {
10518                         tr->flags |= TRACE_ARRAY_FL_BOOT;
10519                         tr->ref++;
10520                 }
10521
10522                 while ((tok = strsep(&curr_str, ","))) {
10523                         early_enable_events(tr, tok, true);
10524                 }
10525         }
10526 }
10527
10528 __init static int tracer_alloc_buffers(void)
10529 {
10530         int ring_buf_size;
10531         int ret = -ENOMEM;
10532
10533
10534         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10535                 pr_warn("Tracing disabled due to lockdown\n");
10536                 return -EPERM;
10537         }
10538
10539         /*
10540          * Make sure we don't accidentally add more trace options
10541          * than we have bits for.
10542          */
10543         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10544
10545         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10546                 goto out;
10547
10548         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10549                 goto out_free_buffer_mask;
10550
10551         /* Only allocate trace_printk buffers if a trace_printk exists */
10552         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10553                 /* Must be called before global_trace.buffer is allocated */
10554                 trace_printk_init_buffers();
10555
10556         /* To save memory, keep the ring buffer size to its minimum */
10557         if (global_trace.ring_buffer_expanded)
10558                 ring_buf_size = trace_buf_size;
10559         else
10560                 ring_buf_size = 1;
10561
10562         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10563         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10564
10565         raw_spin_lock_init(&global_trace.start_lock);
10566
10567         /*
10568          * The prepare callbacks allocates some memory for the ring buffer. We
10569          * don't free the buffer if the CPU goes down. If we were to free
10570          * the buffer, then the user would lose any trace that was in the
10571          * buffer. The memory will be removed once the "instance" is removed.
10572          */
10573         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10574                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10575                                       NULL);
10576         if (ret < 0)
10577                 goto out_free_cpumask;
10578         /* Used for event triggers */
10579         ret = -ENOMEM;
10580         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10581         if (!temp_buffer)
10582                 goto out_rm_hp_state;
10583
10584         if (trace_create_savedcmd() < 0)
10585                 goto out_free_temp_buffer;
10586
10587         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10588                 goto out_free_savedcmd;
10589
10590         /* TODO: make the number of buffers hot pluggable with CPUS */
10591         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10592                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10593                 goto out_free_pipe_cpumask;
10594         }
10595         if (global_trace.buffer_disabled)
10596                 tracing_off();
10597
10598         if (trace_boot_clock) {
10599                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10600                 if (ret < 0)
10601                         pr_warn("Trace clock %s not defined, going back to default\n",
10602                                 trace_boot_clock);
10603         }
10604
10605         /*
10606          * register_tracer() might reference current_trace, so it
10607          * needs to be set before we register anything. This is
10608          * just a bootstrap of current_trace anyway.
10609          */
10610         global_trace.current_trace = &nop_trace;
10611
10612         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10613 #ifdef CONFIG_TRACER_MAX_TRACE
10614         spin_lock_init(&global_trace.snapshot_trigger_lock);
10615 #endif
10616         ftrace_init_global_array_ops(&global_trace);
10617
10618         init_trace_flags_index(&global_trace);
10619
10620         register_tracer(&nop_trace);
10621
10622         /* Function tracing may start here (via kernel command line) */
10623         init_function_trace();
10624
10625         /* All seems OK, enable tracing */
10626         tracing_disabled = 0;
10627
10628         atomic_notifier_chain_register(&panic_notifier_list,
10629                                        &trace_panic_notifier);
10630
10631         register_die_notifier(&trace_die_notifier);
10632
10633         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10634
10635         INIT_LIST_HEAD(&global_trace.systems);
10636         INIT_LIST_HEAD(&global_trace.events);
10637         INIT_LIST_HEAD(&global_trace.hist_vars);
10638         INIT_LIST_HEAD(&global_trace.err_log);
10639         list_add(&global_trace.list, &ftrace_trace_arrays);
10640
10641         apply_trace_boot_options();
10642
10643         register_snapshot_cmd();
10644
10645         return 0;
10646
10647 out_free_pipe_cpumask:
10648         free_cpumask_var(global_trace.pipe_cpumask);
10649 out_free_savedcmd:
10650         trace_free_saved_cmdlines_buffer();
10651 out_free_temp_buffer:
10652         ring_buffer_free(temp_buffer);
10653 out_rm_hp_state:
10654         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10655 out_free_cpumask:
10656         free_cpumask_var(global_trace.tracing_cpumask);
10657 out_free_buffer_mask:
10658         free_cpumask_var(tracing_buffer_mask);
10659 out:
10660         return ret;
10661 }
10662
10663 void __init ftrace_boot_snapshot(void)
10664 {
10665 #ifdef CONFIG_TRACER_MAX_TRACE
10666         struct trace_array *tr;
10667
10668         if (!snapshot_at_boot)
10669                 return;
10670
10671         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10672                 if (!tr->allocated_snapshot)
10673                         continue;
10674
10675                 tracing_snapshot_instance(tr);
10676                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10677         }
10678 #endif
10679 }
10680
10681 void __init early_trace_init(void)
10682 {
10683         if (tracepoint_printk) {
10684                 tracepoint_print_iter =
10685                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10686                 if (MEM_FAIL(!tracepoint_print_iter,
10687                              "Failed to allocate trace iterator\n"))
10688                         tracepoint_printk = 0;
10689                 else
10690                         static_key_enable(&tracepoint_printk_key.key);
10691         }
10692         tracer_alloc_buffers();
10693
10694         init_events();
10695 }
10696
10697 void __init trace_init(void)
10698 {
10699         trace_event_init();
10700
10701         if (boot_instance_index)
10702                 enable_instances();
10703 }
10704
10705 __init static void clear_boot_tracer(void)
10706 {
10707         /*
10708          * The default tracer at boot buffer is an init section.
10709          * This function is called in lateinit. If we did not
10710          * find the boot tracer, then clear it out, to prevent
10711          * later registration from accessing the buffer that is
10712          * about to be freed.
10713          */
10714         if (!default_bootup_tracer)
10715                 return;
10716
10717         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10718                default_bootup_tracer);
10719         default_bootup_tracer = NULL;
10720 }
10721
10722 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10723 __init static void tracing_set_default_clock(void)
10724 {
10725         /* sched_clock_stable() is determined in late_initcall */
10726         if (!trace_boot_clock && !sched_clock_stable()) {
10727                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10728                         pr_warn("Can not set tracing clock due to lockdown\n");
10729                         return;
10730                 }
10731
10732                 printk(KERN_WARNING
10733                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10734                        "If you want to keep using the local clock, then add:\n"
10735                        "  \"trace_clock=local\"\n"
10736                        "on the kernel command line\n");
10737                 tracing_set_clock(&global_trace, "global");
10738         }
10739 }
10740 #else
10741 static inline void tracing_set_default_clock(void) { }
10742 #endif
10743
10744 __init static int late_trace_init(void)
10745 {
10746         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10747                 static_key_disable(&tracepoint_printk_key.key);
10748                 tracepoint_printk = 0;
10749         }
10750
10751         tracing_set_default_clock();
10752         clear_boot_tracer();
10753         return 0;
10754 }
10755
10756 late_initcall_sync(late_trace_init);
This page took 0.651879 seconds and 4 git commands to generate.