]> Git Repo - linux.git/blob - kernel/trace/trace.c
zstd: import usptream v1.5.2
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <[email protected]>
6  * Copyright (C) 2008 Ingo Molnar <[email protected]>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <[email protected]>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         local_irq_disable();
1197         arch_spin_lock(&tr->max_lock);
1198
1199         if (tr->cond_snapshot)
1200                 cond_data = tr->cond_snapshot->cond_data;
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210                                         struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215         int ret;
1216
1217         if (!tr->allocated_snapshot) {
1218
1219                 /* allocate spare buffer */
1220                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222                 if (ret < 0)
1223                         return ret;
1224
1225                 tr->allocated_snapshot = true;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233         /*
1234          * We don't free the ring buffer. instead, resize it because
1235          * The max_tr ring buffer has some state (e.g. ring->clock) and
1236          * we want preserve it.
1237          */
1238         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239         set_buffer_entries(&tr->max_buffer, 1);
1240         tracing_reset_online_cpus(&tr->max_buffer);
1241         tr->allocated_snapshot = false;
1242 }
1243
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256         struct trace_array *tr = &global_trace;
1257         int ret;
1258
1259         ret = tracing_alloc_snapshot_instance(tr);
1260         WARN_ON(ret < 0);
1261
1262         return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279         int ret;
1280
1281         ret = tracing_alloc_snapshot();
1282         if (ret < 0)
1283                 return;
1284
1285         tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:         The tracing instance
1292  * @cond_data:  User data to associate with the snapshot
1293  * @update:     Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303                                  cond_update_fn_t update)
1304 {
1305         struct cond_snapshot *cond_snapshot;
1306         int ret = 0;
1307
1308         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309         if (!cond_snapshot)
1310                 return -ENOMEM;
1311
1312         cond_snapshot->cond_data = cond_data;
1313         cond_snapshot->update = update;
1314
1315         mutex_lock(&trace_types_lock);
1316
1317         ret = tracing_alloc_snapshot_instance(tr);
1318         if (ret)
1319                 goto fail_unlock;
1320
1321         if (tr->current_trace->use_max_tr) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         /*
1327          * The cond_snapshot can only change to NULL without the
1328          * trace_types_lock. We don't care if we race with it going
1329          * to NULL, but we want to make sure that it's not set to
1330          * something other than NULL when we get here, which we can
1331          * do safely with only holding the trace_types_lock and not
1332          * having to take the max_lock.
1333          */
1334         if (tr->cond_snapshot) {
1335                 ret = -EBUSY;
1336                 goto fail_unlock;
1337         }
1338
1339         local_irq_disable();
1340         arch_spin_lock(&tr->max_lock);
1341         tr->cond_snapshot = cond_snapshot;
1342         arch_spin_unlock(&tr->max_lock);
1343         local_irq_enable();
1344
1345         mutex_unlock(&trace_types_lock);
1346
1347         return ret;
1348
1349  fail_unlock:
1350         mutex_unlock(&trace_types_lock);
1351         kfree(cond_snapshot);
1352         return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:         The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368         int ret = 0;
1369
1370         local_irq_disable();
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381         local_irq_enable();
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696
1697 #ifdef LATENCY_FS_NOTIFY
1698
1699 static struct workqueue_struct *fsnotify_wq;
1700
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703         struct trace_array *tr = container_of(work, struct trace_array,
1704                                               fsnotify_work);
1705         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710         struct trace_array *tr = container_of(iwork, struct trace_array,
1711                                               fsnotify_irqwork);
1712         queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716                                      struct dentry *d_tracer)
1717 {
1718         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720         tr->d_max_latency = trace_create_file("tracing_max_latency",
1721                                               TRACE_MODE_WRITE,
1722                                               d_tracer, &tr->max_latency,
1723                                               &tracing_max_lat_fops);
1724 }
1725
1726 __init static int latency_fsnotify_init(void)
1727 {
1728         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1730         if (!fsnotify_wq) {
1731                 pr_err("Unable to allocate tr_max_lat_wq\n");
1732                 return -ENOMEM;
1733         }
1734         return 0;
1735 }
1736
1737 late_initcall_sync(latency_fsnotify_init);
1738
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741         if (!fsnotify_wq)
1742                 return;
1743         /*
1744          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745          * possible that we are called from __schedule() or do_idle(), which
1746          * could cause a deadlock.
1747          */
1748         irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1752         || defined(CONFIG_OSNOISE_TRACER)
1753
1754 #define trace_create_maxlat_file(tr, d_tracer)                          \
1755         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1756                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1760 #endif
1761
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771         struct array_buffer *trace_buf = &tr->array_buffer;
1772         struct array_buffer *max_buf = &tr->max_buffer;
1773         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775
1776         max_buf->cpu = cpu;
1777         max_buf->time_start = data->preempt_timestamp;
1778
1779         max_data->saved_latency = tr->max_latency;
1780         max_data->critical_start = data->critical_start;
1781         max_data->critical_end = data->critical_end;
1782
1783         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784         max_data->pid = tsk->pid;
1785         /*
1786          * If tsk == current, then use current_uid(), as that does not use
1787          * RCU. The irq tracer can be called out of RCU scope.
1788          */
1789         if (tsk == current)
1790                 max_data->uid = current_uid();
1791         else
1792                 max_data->uid = task_uid(tsk);
1793
1794         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795         max_data->policy = tsk->policy;
1796         max_data->rt_priority = tsk->rt_priority;
1797
1798         /* record this tasks comm */
1799         tracing_record_cmdline(tsk);
1800         latency_fsnotify(tr);
1801 }
1802
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815               void *cond_data)
1816 {
1817         if (tr->stop_count)
1818                 return;
1819
1820         WARN_ON_ONCE(!irqs_disabled());
1821
1822         if (!tr->allocated_snapshot) {
1823                 /* Only the nop tracer should hit this when disabling */
1824                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825                 return;
1826         }
1827
1828         arch_spin_lock(&tr->max_lock);
1829
1830         /* Inherit the recordable setting from array_buffer */
1831         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832                 ring_buffer_record_on(tr->max_buffer.buffer);
1833         else
1834                 ring_buffer_record_off(tr->max_buffer.buffer);
1835
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838                 goto out_unlock;
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844  out_unlock:
1845         arch_spin_unlock(&tr->max_lock);
1846 }
1847
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859         int ret;
1860
1861         if (tr->stop_count)
1862                 return;
1863
1864         WARN_ON_ONCE(!irqs_disabled());
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874
1875         if (ret == -EBUSY) {
1876                 /*
1877                  * We failed to swap the buffer due to a commit taking
1878                  * place on this CPU. We fail to record, but we reset
1879                  * the max trace buffer (no one writes directly to it)
1880                  * and flag that it failed.
1881                  */
1882                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883                         "Failed to swap buffers due to commit in progress\n");
1884         }
1885
1886         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888         __update_max_tr(tr, tsk, cpu);
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2184 {
2185         struct trace_array *tr;
2186
2187         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188                 if (!tr->clear_trace)
2189                         continue;
2190                 tr->clear_trace = false;
2191                 tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193                 tracing_reset_online_cpus(&tr->max_buffer);
2194 #endif
2195         }
2196 }
2197
2198 /*
2199  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200  * is the tgid last observed corresponding to pid=i.
2201  */
2202 static int *tgid_map;
2203
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2206
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2209 /*
2210  * Preemption must be disabled before acquiring trace_cmdline_lock.
2211  * The various trace_arrays' max_lock must be acquired in a context
2212  * where interrupt is disabled.
2213  */
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217         unsigned *map_cmdline_to_pid;
2218         unsigned cmdline_num;
2219         int cmdline_idx;
2220         char *saved_cmdlines;
2221 };
2222 static struct saved_cmdlines_buffer *savedcmd;
2223
2224 static inline char *get_saved_cmdlines(int idx)
2225 {
2226         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2227 }
2228
2229 static inline void set_cmdline(int idx, const char *cmdline)
2230 {
2231         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2232 }
2233
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235                                     struct saved_cmdlines_buffer *s)
2236 {
2237         s->map_cmdline_to_pid = kmalloc_array(val,
2238                                               sizeof(*s->map_cmdline_to_pid),
2239                                               GFP_KERNEL);
2240         if (!s->map_cmdline_to_pid)
2241                 return -ENOMEM;
2242
2243         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244         if (!s->saved_cmdlines) {
2245                 kfree(s->map_cmdline_to_pid);
2246                 return -ENOMEM;
2247         }
2248
2249         s->cmdline_idx = 0;
2250         s->cmdline_num = val;
2251         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252                sizeof(s->map_pid_to_cmdline));
2253         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254                val * sizeof(*s->map_cmdline_to_pid));
2255
2256         return 0;
2257 }
2258
2259 static int trace_create_savedcmd(void)
2260 {
2261         int ret;
2262
2263         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2264         if (!savedcmd)
2265                 return -ENOMEM;
2266
2267         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2268         if (ret < 0) {
2269                 kfree(savedcmd);
2270                 savedcmd = NULL;
2271                 return -ENOMEM;
2272         }
2273
2274         return 0;
2275 }
2276
2277 int is_tracing_stopped(void)
2278 {
2279         return global_trace.stop_count;
2280 }
2281
2282 /**
2283  * tracing_start - quick start of the tracer
2284  *
2285  * If tracing is enabled but was stopped by tracing_stop,
2286  * this will start the tracer back up.
2287  */
2288 void tracing_start(void)
2289 {
2290         struct trace_buffer *buffer;
2291         unsigned long flags;
2292
2293         if (tracing_disabled)
2294                 return;
2295
2296         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297         if (--global_trace.stop_count) {
2298                 if (global_trace.stop_count < 0) {
2299                         /* Someone screwed up their debugging */
2300                         WARN_ON_ONCE(1);
2301                         global_trace.stop_count = 0;
2302                 }
2303                 goto out;
2304         }
2305
2306         /* Prevent the buffers from switching */
2307         arch_spin_lock(&global_trace.max_lock);
2308
2309         buffer = global_trace.array_buffer.buffer;
2310         if (buffer)
2311                 ring_buffer_record_enable(buffer);
2312
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314         buffer = global_trace.max_buffer.buffer;
2315         if (buffer)
2316                 ring_buffer_record_enable(buffer);
2317 #endif
2318
2319         arch_spin_unlock(&global_trace.max_lock);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2323 }
2324
2325 static void tracing_start_tr(struct trace_array *tr)
2326 {
2327         struct trace_buffer *buffer;
2328         unsigned long flags;
2329
2330         if (tracing_disabled)
2331                 return;
2332
2333         /* If global, we need to also start the max tracer */
2334         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335                 return tracing_start();
2336
2337         raw_spin_lock_irqsave(&tr->start_lock, flags);
2338
2339         if (--tr->stop_count) {
2340                 if (tr->stop_count < 0) {
2341                         /* Someone screwed up their debugging */
2342                         WARN_ON_ONCE(1);
2343                         tr->stop_count = 0;
2344                 }
2345                 goto out;
2346         }
2347
2348         buffer = tr->array_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_enable(buffer);
2351
2352  out:
2353         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2354 }
2355
2356 /**
2357  * tracing_stop - quick stop of the tracer
2358  *
2359  * Light weight way to stop tracing. Use in conjunction with
2360  * tracing_start.
2361  */
2362 void tracing_stop(void)
2363 {
2364         struct trace_buffer *buffer;
2365         unsigned long flags;
2366
2367         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368         if (global_trace.stop_count++)
2369                 goto out;
2370
2371         /* Prevent the buffers from switching */
2372         arch_spin_lock(&global_trace.max_lock);
2373
2374         buffer = global_trace.array_buffer.buffer;
2375         if (buffer)
2376                 ring_buffer_record_disable(buffer);
2377
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379         buffer = global_trace.max_buffer.buffer;
2380         if (buffer)
2381                 ring_buffer_record_disable(buffer);
2382 #endif
2383
2384         arch_spin_unlock(&global_trace.max_lock);
2385
2386  out:
2387         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2388 }
2389
2390 static void tracing_stop_tr(struct trace_array *tr)
2391 {
2392         struct trace_buffer *buffer;
2393         unsigned long flags;
2394
2395         /* If global, we need to also stop the max tracer */
2396         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397                 return tracing_stop();
2398
2399         raw_spin_lock_irqsave(&tr->start_lock, flags);
2400         if (tr->stop_count++)
2401                 goto out;
2402
2403         buffer = tr->array_buffer.buffer;
2404         if (buffer)
2405                 ring_buffer_record_disable(buffer);
2406
2407  out:
2408         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2409 }
2410
2411 static int trace_save_cmdline(struct task_struct *tsk)
2412 {
2413         unsigned tpid, idx;
2414
2415         /* treat recording of idle task as a success */
2416         if (!tsk->pid)
2417                 return 1;
2418
2419         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2420
2421         /*
2422          * It's not the end of the world if we don't get
2423          * the lock, but we also don't want to spin
2424          * nor do we want to disable interrupts,
2425          * so if we miss here, then better luck next time.
2426          *
2427          * This is called within the scheduler and wake up, so interrupts
2428          * had better been disabled and run queue lock been held.
2429          */
2430         lockdep_assert_preemption_disabled();
2431         if (!arch_spin_trylock(&trace_cmdline_lock))
2432                 return 0;
2433
2434         idx = savedcmd->map_pid_to_cmdline[tpid];
2435         if (idx == NO_CMDLINE_MAP) {
2436                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2437
2438                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2439                 savedcmd->cmdline_idx = idx;
2440         }
2441
2442         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443         set_cmdline(idx, tsk->comm);
2444
2445         arch_spin_unlock(&trace_cmdline_lock);
2446
2447         return 1;
2448 }
2449
2450 static void __trace_find_cmdline(int pid, char comm[])
2451 {
2452         unsigned map;
2453         int tpid;
2454
2455         if (!pid) {
2456                 strcpy(comm, "<idle>");
2457                 return;
2458         }
2459
2460         if (WARN_ON_ONCE(pid < 0)) {
2461                 strcpy(comm, "<XXX>");
2462                 return;
2463         }
2464
2465         tpid = pid & (PID_MAX_DEFAULT - 1);
2466         map = savedcmd->map_pid_to_cmdline[tpid];
2467         if (map != NO_CMDLINE_MAP) {
2468                 tpid = savedcmd->map_cmdline_to_pid[map];
2469                 if (tpid == pid) {
2470                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2471                         return;
2472                 }
2473         }
2474         strcpy(comm, "<...>");
2475 }
2476
2477 void trace_find_cmdline(int pid, char comm[])
2478 {
2479         preempt_disable();
2480         arch_spin_lock(&trace_cmdline_lock);
2481
2482         __trace_find_cmdline(pid, comm);
2483
2484         arch_spin_unlock(&trace_cmdline_lock);
2485         preempt_enable();
2486 }
2487
2488 static int *trace_find_tgid_ptr(int pid)
2489 {
2490         /*
2491          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492          * if we observe a non-NULL tgid_map then we also observe the correct
2493          * tgid_map_max.
2494          */
2495         int *map = smp_load_acquire(&tgid_map);
2496
2497         if (unlikely(!map || pid > tgid_map_max))
2498                 return NULL;
2499
2500         return &map[pid];
2501 }
2502
2503 int trace_find_tgid(int pid)
2504 {
2505         int *ptr = trace_find_tgid_ptr(pid);
2506
2507         return ptr ? *ptr : 0;
2508 }
2509
2510 static int trace_save_tgid(struct task_struct *tsk)
2511 {
2512         int *ptr;
2513
2514         /* treat recording of idle task as a success */
2515         if (!tsk->pid)
2516                 return 1;
2517
2518         ptr = trace_find_tgid_ptr(tsk->pid);
2519         if (!ptr)
2520                 return 0;
2521
2522         *ptr = tsk->tgid;
2523         return 1;
2524 }
2525
2526 static bool tracing_record_taskinfo_skip(int flags)
2527 {
2528         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2529                 return true;
2530         if (!__this_cpu_read(trace_taskinfo_save))
2531                 return true;
2532         return false;
2533 }
2534
2535 /**
2536  * tracing_record_taskinfo - record the task info of a task
2537  *
2538  * @task:  task to record
2539  * @flags: TRACE_RECORD_CMDLINE for recording comm
2540  *         TRACE_RECORD_TGID for recording tgid
2541  */
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2543 {
2544         bool done;
2545
2546         if (tracing_record_taskinfo_skip(flags))
2547                 return;
2548
2549         /*
2550          * Record as much task information as possible. If some fail, continue
2551          * to try to record the others.
2552          */
2553         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2555
2556         /* If recording any information failed, retry again soon. */
2557         if (!done)
2558                 return;
2559
2560         __this_cpu_write(trace_taskinfo_save, false);
2561 }
2562
2563 /**
2564  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2565  *
2566  * @prev: previous task during sched_switch
2567  * @next: next task during sched_switch
2568  * @flags: TRACE_RECORD_CMDLINE for recording comm
2569  *         TRACE_RECORD_TGID for recording tgid
2570  */
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572                                           struct task_struct *next, int flags)
2573 {
2574         bool done;
2575
2576         if (tracing_record_taskinfo_skip(flags))
2577                 return;
2578
2579         /*
2580          * Record as much task information as possible. If some fail, continue
2581          * to try to record the others.
2582          */
2583         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2587
2588         /* If recording any information failed, retry again soon. */
2589         if (!done)
2590                 return;
2591
2592         __this_cpu_write(trace_taskinfo_save, false);
2593 }
2594
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2597 {
2598         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2599 }
2600
2601 void tracing_record_tgid(struct task_struct *task)
2602 {
2603         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2604 }
2605
2606 /*
2607  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609  * simplifies those functions and keeps them in sync.
2610  */
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2612 {
2613         return trace_seq_has_overflowed(s) ?
2614                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2615 }
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2617
2618 static unsigned short migration_disable_value(void)
2619 {
2620 #if defined(CONFIG_SMP)
2621         return current->migration_disabled;
2622 #else
2623         return 0;
2624 #endif
2625 }
2626
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2628 {
2629         unsigned int trace_flags = irqs_status;
2630         unsigned int pc;
2631
2632         pc = preempt_count();
2633
2634         if (pc & NMI_MASK)
2635                 trace_flags |= TRACE_FLAG_NMI;
2636         if (pc & HARDIRQ_MASK)
2637                 trace_flags |= TRACE_FLAG_HARDIRQ;
2638         if (in_serving_softirq())
2639                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2640         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641                 trace_flags |= TRACE_FLAG_BH_OFF;
2642
2643         if (tif_need_resched())
2644                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645         if (test_preempt_need_resched())
2646                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2649 }
2650
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2653                           int type,
2654                           unsigned long len,
2655                           unsigned int trace_ctx)
2656 {
2657         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2658 }
2659
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2663
2664 /**
2665  * trace_buffered_event_enable - enable buffering events
2666  *
2667  * When events are being filtered, it is quicker to use a temporary
2668  * buffer to write the event data into if there's a likely chance
2669  * that it will not be committed. The discard of the ring buffer
2670  * is not as fast as committing, and is much slower than copying
2671  * a commit.
2672  *
2673  * When an event is to be filtered, allocate per cpu buffers to
2674  * write the event data into, and if the event is filtered and discarded
2675  * it is simply dropped, otherwise, the entire data is to be committed
2676  * in one shot.
2677  */
2678 void trace_buffered_event_enable(void)
2679 {
2680         struct ring_buffer_event *event;
2681         struct page *page;
2682         int cpu;
2683
2684         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2685
2686         if (trace_buffered_event_ref++)
2687                 return;
2688
2689         for_each_tracing_cpu(cpu) {
2690                 page = alloc_pages_node(cpu_to_node(cpu),
2691                                         GFP_KERNEL | __GFP_NORETRY, 0);
2692                 if (!page)
2693                         goto failed;
2694
2695                 event = page_address(page);
2696                 memset(event, 0, sizeof(*event));
2697
2698                 per_cpu(trace_buffered_event, cpu) = event;
2699
2700                 preempt_disable();
2701                 if (cpu == smp_processor_id() &&
2702                     __this_cpu_read(trace_buffered_event) !=
2703                     per_cpu(trace_buffered_event, cpu))
2704                         WARN_ON_ONCE(1);
2705                 preempt_enable();
2706         }
2707
2708         return;
2709  failed:
2710         trace_buffered_event_disable();
2711 }
2712
2713 static void enable_trace_buffered_event(void *data)
2714 {
2715         /* Probably not needed, but do it anyway */
2716         smp_rmb();
2717         this_cpu_dec(trace_buffered_event_cnt);
2718 }
2719
2720 static void disable_trace_buffered_event(void *data)
2721 {
2722         this_cpu_inc(trace_buffered_event_cnt);
2723 }
2724
2725 /**
2726  * trace_buffered_event_disable - disable buffering events
2727  *
2728  * When a filter is removed, it is faster to not use the buffered
2729  * events, and to commit directly into the ring buffer. Free up
2730  * the temp buffers when there are no more users. This requires
2731  * special synchronization with current events.
2732  */
2733 void trace_buffered_event_disable(void)
2734 {
2735         int cpu;
2736
2737         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738
2739         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2740                 return;
2741
2742         if (--trace_buffered_event_ref)
2743                 return;
2744
2745         preempt_disable();
2746         /* For each CPU, set the buffer as used. */
2747         smp_call_function_many(tracing_buffer_mask,
2748                                disable_trace_buffered_event, NULL, 1);
2749         preempt_enable();
2750
2751         /* Wait for all current users to finish */
2752         synchronize_rcu();
2753
2754         for_each_tracing_cpu(cpu) {
2755                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756                 per_cpu(trace_buffered_event, cpu) = NULL;
2757         }
2758         /*
2759          * Make sure trace_buffered_event is NULL before clearing
2760          * trace_buffered_event_cnt.
2761          */
2762         smp_wmb();
2763
2764         preempt_disable();
2765         /* Do the work on each cpu */
2766         smp_call_function_many(tracing_buffer_mask,
2767                                enable_trace_buffered_event, NULL, 1);
2768         preempt_enable();
2769 }
2770
2771 static struct trace_buffer *temp_buffer;
2772
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775                           struct trace_event_file *trace_file,
2776                           int type, unsigned long len,
2777                           unsigned int trace_ctx)
2778 {
2779         struct ring_buffer_event *entry;
2780         struct trace_array *tr = trace_file->tr;
2781         int val;
2782
2783         *current_rb = tr->array_buffer.buffer;
2784
2785         if (!tr->no_filter_buffering_ref &&
2786             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787                 preempt_disable_notrace();
2788                 /*
2789                  * Filtering is on, so try to use the per cpu buffer first.
2790                  * This buffer will simulate a ring_buffer_event,
2791                  * where the type_len is zero and the array[0] will
2792                  * hold the full length.
2793                  * (see include/linux/ring-buffer.h for details on
2794                  *  how the ring_buffer_event is structured).
2795                  *
2796                  * Using a temp buffer during filtering and copying it
2797                  * on a matched filter is quicker than writing directly
2798                  * into the ring buffer and then discarding it when
2799                  * it doesn't match. That is because the discard
2800                  * requires several atomic operations to get right.
2801                  * Copying on match and doing nothing on a failed match
2802                  * is still quicker than no copy on match, but having
2803                  * to discard out of the ring buffer on a failed match.
2804                  */
2805                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2806                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2807
2808                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2809
2810                         /*
2811                          * Preemption is disabled, but interrupts and NMIs
2812                          * can still come in now. If that happens after
2813                          * the above increment, then it will have to go
2814                          * back to the old method of allocating the event
2815                          * on the ring buffer, and if the filter fails, it
2816                          * will have to call ring_buffer_discard_commit()
2817                          * to remove it.
2818                          *
2819                          * Need to also check the unlikely case that the
2820                          * length is bigger than the temp buffer size.
2821                          * If that happens, then the reserve is pretty much
2822                          * guaranteed to fail, as the ring buffer currently
2823                          * only allows events less than a page. But that may
2824                          * change in the future, so let the ring buffer reserve
2825                          * handle the failure in that case.
2826                          */
2827                         if (val == 1 && likely(len <= max_len)) {
2828                                 trace_event_setup(entry, type, trace_ctx);
2829                                 entry->array[0] = len;
2830                                 /* Return with preemption disabled */
2831                                 return entry;
2832                         }
2833                         this_cpu_dec(trace_buffered_event_cnt);
2834                 }
2835                 /* __trace_buffer_lock_reserve() disables preemption */
2836                 preempt_enable_notrace();
2837         }
2838
2839         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2840                                             trace_ctx);
2841         /*
2842          * If tracing is off, but we have triggers enabled
2843          * we still need to look at the event data. Use the temp_buffer
2844          * to store the trace event for the trigger to use. It's recursive
2845          * safe and will not be recorded anywhere.
2846          */
2847         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848                 *current_rb = temp_buffer;
2849                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2850                                                     trace_ctx);
2851         }
2852         return entry;
2853 }
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2855
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2858
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2860 {
2861         struct trace_event_call *event_call;
2862         struct trace_event_file *file;
2863         struct trace_event *event;
2864         unsigned long flags;
2865         struct trace_iterator *iter = tracepoint_print_iter;
2866
2867         /* We should never get here if iter is NULL */
2868         if (WARN_ON_ONCE(!iter))
2869                 return;
2870
2871         event_call = fbuffer->trace_file->event_call;
2872         if (!event_call || !event_call->event.funcs ||
2873             !event_call->event.funcs->trace)
2874                 return;
2875
2876         file = fbuffer->trace_file;
2877         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879              !filter_match_preds(file->filter, fbuffer->entry)))
2880                 return;
2881
2882         event = &fbuffer->trace_file->event_call->event;
2883
2884         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885         trace_seq_init(&iter->seq);
2886         iter->ent = fbuffer->entry;
2887         event_call->event.funcs->trace(iter, 0, event);
2888         trace_seq_putc(&iter->seq, 0);
2889         printk("%s", iter->seq.buffer);
2890
2891         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2892 }
2893
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895                              void *buffer, size_t *lenp,
2896                              loff_t *ppos)
2897 {
2898         int save_tracepoint_printk;
2899         int ret;
2900
2901         mutex_lock(&tracepoint_printk_mutex);
2902         save_tracepoint_printk = tracepoint_printk;
2903
2904         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2905
2906         /*
2907          * This will force exiting early, as tracepoint_printk
2908          * is always zero when tracepoint_printk_iter is not allocated
2909          */
2910         if (!tracepoint_print_iter)
2911                 tracepoint_printk = 0;
2912
2913         if (save_tracepoint_printk == tracepoint_printk)
2914                 goto out;
2915
2916         if (tracepoint_printk)
2917                 static_key_enable(&tracepoint_printk_key.key);
2918         else
2919                 static_key_disable(&tracepoint_printk_key.key);
2920
2921  out:
2922         mutex_unlock(&tracepoint_printk_mutex);
2923
2924         return ret;
2925 }
2926
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2928 {
2929         enum event_trigger_type tt = ETT_NONE;
2930         struct trace_event_file *file = fbuffer->trace_file;
2931
2932         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933                         fbuffer->entry, &tt))
2934                 goto discard;
2935
2936         if (static_key_false(&tracepoint_printk_key.key))
2937                 output_printk(fbuffer);
2938
2939         if (static_branch_unlikely(&trace_event_exports_enabled))
2940                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2941
2942         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2944
2945 discard:
2946         if (tt)
2947                 event_triggers_post_call(file, tt);
2948
2949 }
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2951
2952 /*
2953  * Skip 3:
2954  *
2955  *   trace_buffer_unlock_commit_regs()
2956  *   trace_event_buffer_commit()
2957  *   trace_event_raw_event_xxx()
2958  */
2959 # define STACK_SKIP 3
2960
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962                                      struct trace_buffer *buffer,
2963                                      struct ring_buffer_event *event,
2964                                      unsigned int trace_ctx,
2965                                      struct pt_regs *regs)
2966 {
2967         __buffer_unlock_commit(buffer, event);
2968
2969         /*
2970          * If regs is not set, then skip the necessary functions.
2971          * Note, we can still get here via blktrace, wakeup tracer
2972          * and mmiotrace, but that's ok if they lose a function or
2973          * two. They are not that meaningful.
2974          */
2975         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976         ftrace_trace_userstack(tr, buffer, trace_ctx);
2977 }
2978
2979 /*
2980  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2981  */
2982 void
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984                                    struct ring_buffer_event *event)
2985 {
2986         __buffer_unlock_commit(buffer, event);
2987 }
2988
2989 void
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991                parent_ip, unsigned int trace_ctx)
2992 {
2993         struct trace_event_call *call = &event_function;
2994         struct trace_buffer *buffer = tr->array_buffer.buffer;
2995         struct ring_buffer_event *event;
2996         struct ftrace_entry *entry;
2997
2998         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2999                                             trace_ctx);
3000         if (!event)
3001                 return;
3002         entry   = ring_buffer_event_data(event);
3003         entry->ip                       = ip;
3004         entry->parent_ip                = parent_ip;
3005
3006         if (!call_filter_check_discard(call, entry, buffer, event)) {
3007                 if (static_branch_unlikely(&trace_function_exports_enabled))
3008                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009                 __buffer_unlock_commit(buffer, event);
3010         }
3011 }
3012
3013 #ifdef CONFIG_STACKTRACE
3014
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING   4
3017
3018 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3019
3020 struct ftrace_stack {
3021         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3022 };
3023
3024
3025 struct ftrace_stacks {
3026         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3027 };
3028
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3031
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033                                  unsigned int trace_ctx,
3034                                  int skip, struct pt_regs *regs)
3035 {
3036         struct trace_event_call *call = &event_kernel_stack;
3037         struct ring_buffer_event *event;
3038         unsigned int size, nr_entries;
3039         struct ftrace_stack *fstack;
3040         struct stack_entry *entry;
3041         int stackidx;
3042
3043         /*
3044          * Add one, for this function and the call to save_stack_trace()
3045          * If regs is set, then these functions will not be in the way.
3046          */
3047 #ifndef CONFIG_UNWINDER_ORC
3048         if (!regs)
3049                 skip++;
3050 #endif
3051
3052         preempt_disable_notrace();
3053
3054         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3055
3056         /* This should never happen. If it does, yell once and skip */
3057         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3058                 goto out;
3059
3060         /*
3061          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062          * interrupt will either see the value pre increment or post
3063          * increment. If the interrupt happens pre increment it will have
3064          * restored the counter when it returns.  We just need a barrier to
3065          * keep gcc from moving things around.
3066          */
3067         barrier();
3068
3069         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070         size = ARRAY_SIZE(fstack->calls);
3071
3072         if (regs) {
3073                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3074                                                    size, skip);
3075         } else {
3076                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3077         }
3078
3079         size = nr_entries * sizeof(unsigned long);
3080         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3082                                     trace_ctx);
3083         if (!event)
3084                 goto out;
3085         entry = ring_buffer_event_data(event);
3086
3087         memcpy(&entry->caller, fstack->calls, size);
3088         entry->size = nr_entries;
3089
3090         if (!call_filter_check_discard(call, entry, buffer, event))
3091                 __buffer_unlock_commit(buffer, event);
3092
3093  out:
3094         /* Again, don't let gcc optimize things here */
3095         barrier();
3096         __this_cpu_dec(ftrace_stack_reserve);
3097         preempt_enable_notrace();
3098
3099 }
3100
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102                                       struct trace_buffer *buffer,
3103                                       unsigned int trace_ctx,
3104                                       int skip, struct pt_regs *regs)
3105 {
3106         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3107                 return;
3108
3109         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3110 }
3111
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3113                    int skip)
3114 {
3115         struct trace_buffer *buffer = tr->array_buffer.buffer;
3116
3117         if (rcu_is_watching()) {
3118                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3119                 return;
3120         }
3121
3122         /*
3123          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124          * but if the above rcu_is_watching() failed, then the NMI
3125          * triggered someplace critical, and ct_irq_enter() should
3126          * not be called from NMI.
3127          */
3128         if (unlikely(in_nmi()))
3129                 return;
3130
3131         ct_irq_enter_irqson();
3132         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133         ct_irq_exit_irqson();
3134 }
3135
3136 /**
3137  * trace_dump_stack - record a stack back trace in the trace buffer
3138  * @skip: Number of functions to skip (helper handlers)
3139  */
3140 void trace_dump_stack(int skip)
3141 {
3142         if (tracing_disabled || tracing_selftest_running)
3143                 return;
3144
3145 #ifndef CONFIG_UNWINDER_ORC
3146         /* Skip 1 to skip this function. */
3147         skip++;
3148 #endif
3149         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3150                              tracing_gen_ctx(), skip, NULL);
3151 }
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3153
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3156
3157 static void
3158 ftrace_trace_userstack(struct trace_array *tr,
3159                        struct trace_buffer *buffer, unsigned int trace_ctx)
3160 {
3161         struct trace_event_call *call = &event_user_stack;
3162         struct ring_buffer_event *event;
3163         struct userstack_entry *entry;
3164
3165         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3166                 return;
3167
3168         /*
3169          * NMIs can not handle page faults, even with fix ups.
3170          * The save user stack can (and often does) fault.
3171          */
3172         if (unlikely(in_nmi()))
3173                 return;
3174
3175         /*
3176          * prevent recursion, since the user stack tracing may
3177          * trigger other kernel events.
3178          */
3179         preempt_disable();
3180         if (__this_cpu_read(user_stack_count))
3181                 goto out;
3182
3183         __this_cpu_inc(user_stack_count);
3184
3185         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186                                             sizeof(*entry), trace_ctx);
3187         if (!event)
3188                 goto out_drop_count;
3189         entry   = ring_buffer_event_data(event);
3190
3191         entry->tgid             = current->tgid;
3192         memset(&entry->caller, 0, sizeof(entry->caller));
3193
3194         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195         if (!call_filter_check_discard(call, entry, buffer, event))
3196                 __buffer_unlock_commit(buffer, event);
3197
3198  out_drop_count:
3199         __this_cpu_dec(user_stack_count);
3200  out:
3201         preempt_enable();
3202 }
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205                                    struct trace_buffer *buffer,
3206                                    unsigned int trace_ctx)
3207 {
3208 }
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3210
3211 #endif /* CONFIG_STACKTRACE */
3212
3213 static inline void
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215                           unsigned long long delta)
3216 {
3217         entry->bottom_delta_ts = delta & U32_MAX;
3218         entry->top_delta_ts = (delta >> 32);
3219 }
3220
3221 void trace_last_func_repeats(struct trace_array *tr,
3222                              struct trace_func_repeats *last_info,
3223                              unsigned int trace_ctx)
3224 {
3225         struct trace_buffer *buffer = tr->array_buffer.buffer;
3226         struct func_repeats_entry *entry;
3227         struct ring_buffer_event *event;
3228         u64 delta;
3229
3230         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231                                             sizeof(*entry), trace_ctx);
3232         if (!event)
3233                 return;
3234
3235         delta = ring_buffer_event_time_stamp(buffer, event) -
3236                 last_info->ts_last_call;
3237
3238         entry = ring_buffer_event_data(event);
3239         entry->ip = last_info->ip;
3240         entry->parent_ip = last_info->parent_ip;
3241         entry->count = last_info->count;
3242         func_repeats_set_delta_ts(entry, delta);
3243
3244         __buffer_unlock_commit(buffer, event);
3245 }
3246
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3249         int nesting;
3250         char buffer[4][TRACE_BUF_SIZE];
3251 };
3252
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3254
3255 /*
3256  * This allows for lockless recording.  If we're nested too deeply, then
3257  * this returns NULL.
3258  */
3259 static char *get_trace_buf(void)
3260 {
3261         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3262
3263         if (!trace_percpu_buffer || buffer->nesting >= 4)
3264                 return NULL;
3265
3266         buffer->nesting++;
3267
3268         /* Interrupts must see nesting incremented before we use the buffer */
3269         barrier();
3270         return &buffer->buffer[buffer->nesting - 1][0];
3271 }
3272
3273 static void put_trace_buf(void)
3274 {
3275         /* Don't let the decrement of nesting leak before this */
3276         barrier();
3277         this_cpu_dec(trace_percpu_buffer->nesting);
3278 }
3279
3280 static int alloc_percpu_trace_buffer(void)
3281 {
3282         struct trace_buffer_struct __percpu *buffers;
3283
3284         if (trace_percpu_buffer)
3285                 return 0;
3286
3287         buffers = alloc_percpu(struct trace_buffer_struct);
3288         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3289                 return -ENOMEM;
3290
3291         trace_percpu_buffer = buffers;
3292         return 0;
3293 }
3294
3295 static int buffers_allocated;
3296
3297 void trace_printk_init_buffers(void)
3298 {
3299         if (buffers_allocated)
3300                 return;
3301
3302         if (alloc_percpu_trace_buffer())
3303                 return;
3304
3305         /* trace_printk() is for debug use only. Don't use it in production. */
3306
3307         pr_warn("\n");
3308         pr_warn("**********************************************************\n");
3309         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3310         pr_warn("**                                                      **\n");
3311         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3312         pr_warn("**                                                      **\n");
3313         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3314         pr_warn("** unsafe for production use.                           **\n");
3315         pr_warn("**                                                      **\n");
3316         pr_warn("** If you see this message and you are not debugging    **\n");
3317         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3318         pr_warn("**                                                      **\n");
3319         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3320         pr_warn("**********************************************************\n");
3321
3322         /* Expand the buffers to set size */
3323         tracing_update_buffers();
3324
3325         buffers_allocated = 1;
3326
3327         /*
3328          * trace_printk_init_buffers() can be called by modules.
3329          * If that happens, then we need to start cmdline recording
3330          * directly here. If the global_trace.buffer is already
3331          * allocated here, then this was called by module code.
3332          */
3333         if (global_trace.array_buffer.buffer)
3334                 tracing_start_cmdline_record();
3335 }
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3337
3338 void trace_printk_start_comm(void)
3339 {
3340         /* Start tracing comms if trace printk is set */
3341         if (!buffers_allocated)
3342                 return;
3343         tracing_start_cmdline_record();
3344 }
3345
3346 static void trace_printk_start_stop_comm(int enabled)
3347 {
3348         if (!buffers_allocated)
3349                 return;
3350
3351         if (enabled)
3352                 tracing_start_cmdline_record();
3353         else
3354                 tracing_stop_cmdline_record();
3355 }
3356
3357 /**
3358  * trace_vbprintk - write binary msg to tracing buffer
3359  * @ip:    The address of the caller
3360  * @fmt:   The string format to write to the buffer
3361  * @args:  Arguments for @fmt
3362  */
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3364 {
3365         struct trace_event_call *call = &event_bprint;
3366         struct ring_buffer_event *event;
3367         struct trace_buffer *buffer;
3368         struct trace_array *tr = &global_trace;
3369         struct bprint_entry *entry;
3370         unsigned int trace_ctx;
3371         char *tbuffer;
3372         int len = 0, size;
3373
3374         if (unlikely(tracing_selftest_running || tracing_disabled))
3375                 return 0;
3376
3377         /* Don't pollute graph traces with trace_vprintk internals */
3378         pause_graph_tracing();
3379
3380         trace_ctx = tracing_gen_ctx();
3381         preempt_disable_notrace();
3382
3383         tbuffer = get_trace_buf();
3384         if (!tbuffer) {
3385                 len = 0;
3386                 goto out_nobuffer;
3387         }
3388
3389         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3390
3391         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3392                 goto out_put;
3393
3394         size = sizeof(*entry) + sizeof(u32) * len;
3395         buffer = tr->array_buffer.buffer;
3396         ring_buffer_nest_start(buffer);
3397         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3398                                             trace_ctx);
3399         if (!event)
3400                 goto out;
3401         entry = ring_buffer_event_data(event);
3402         entry->ip                       = ip;
3403         entry->fmt                      = fmt;
3404
3405         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406         if (!call_filter_check_discard(call, entry, buffer, event)) {
3407                 __buffer_unlock_commit(buffer, event);
3408                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3409         }
3410
3411 out:
3412         ring_buffer_nest_end(buffer);
3413 out_put:
3414         put_trace_buf();
3415
3416 out_nobuffer:
3417         preempt_enable_notrace();
3418         unpause_graph_tracing();
3419
3420         return len;
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3423
3424 __printf(3, 0)
3425 static int
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427                       unsigned long ip, const char *fmt, va_list args)
3428 {
3429         struct trace_event_call *call = &event_print;
3430         struct ring_buffer_event *event;
3431         int len = 0, size;
3432         struct print_entry *entry;
3433         unsigned int trace_ctx;
3434         char *tbuffer;
3435
3436         if (tracing_disabled || tracing_selftest_running)
3437                 return 0;
3438
3439         /* Don't pollute graph traces with trace_vprintk internals */
3440         pause_graph_tracing();
3441
3442         trace_ctx = tracing_gen_ctx();
3443         preempt_disable_notrace();
3444
3445
3446         tbuffer = get_trace_buf();
3447         if (!tbuffer) {
3448                 len = 0;
3449                 goto out_nobuffer;
3450         }
3451
3452         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3453
3454         size = sizeof(*entry) + len + 1;
3455         ring_buffer_nest_start(buffer);
3456         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3457                                             trace_ctx);
3458         if (!event)
3459                 goto out;
3460         entry = ring_buffer_event_data(event);
3461         entry->ip = ip;
3462
3463         memcpy(&entry->buf, tbuffer, len + 1);
3464         if (!call_filter_check_discard(call, entry, buffer, event)) {
3465                 __buffer_unlock_commit(buffer, event);
3466                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3467         }
3468
3469 out:
3470         ring_buffer_nest_end(buffer);
3471         put_trace_buf();
3472
3473 out_nobuffer:
3474         preempt_enable_notrace();
3475         unpause_graph_tracing();
3476
3477         return len;
3478 }
3479
3480 __printf(3, 0)
3481 int trace_array_vprintk(struct trace_array *tr,
3482                         unsigned long ip, const char *fmt, va_list args)
3483 {
3484         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3485 }
3486
3487 /**
3488  * trace_array_printk - Print a message to a specific instance
3489  * @tr: The instance trace_array descriptor
3490  * @ip: The instruction pointer that this is called from.
3491  * @fmt: The format to print (printf format)
3492  *
3493  * If a subsystem sets up its own instance, they have the right to
3494  * printk strings into their tracing instance buffer using this
3495  * function. Note, this function will not write into the top level
3496  * buffer (use trace_printk() for that), as writing into the top level
3497  * buffer should only have events that can be individually disabled.
3498  * trace_printk() is only used for debugging a kernel, and should not
3499  * be ever incorporated in normal use.
3500  *
3501  * trace_array_printk() can be used, as it will not add noise to the
3502  * top level tracing buffer.
3503  *
3504  * Note, trace_array_init_printk() must be called on @tr before this
3505  * can be used.
3506  */
3507 __printf(3, 0)
3508 int trace_array_printk(struct trace_array *tr,
3509                        unsigned long ip, const char *fmt, ...)
3510 {
3511         int ret;
3512         va_list ap;
3513
3514         if (!tr)
3515                 return -ENOENT;
3516
3517         /* This is only allowed for created instances */
3518         if (tr == &global_trace)
3519                 return 0;
3520
3521         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3522                 return 0;
3523
3524         va_start(ap, fmt);
3525         ret = trace_array_vprintk(tr, ip, fmt, ap);
3526         va_end(ap);
3527         return ret;
3528 }
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3530
3531 /**
3532  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533  * @tr: The trace array to initialize the buffers for
3534  *
3535  * As trace_array_printk() only writes into instances, they are OK to
3536  * have in the kernel (unlike trace_printk()). This needs to be called
3537  * before trace_array_printk() can be used on a trace_array.
3538  */
3539 int trace_array_init_printk(struct trace_array *tr)
3540 {
3541         if (!tr)
3542                 return -ENOENT;
3543
3544         /* This is only allowed for created instances */
3545         if (tr == &global_trace)
3546                 return -EINVAL;
3547
3548         return alloc_percpu_trace_buffer();
3549 }
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3551
3552 __printf(3, 4)
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554                            unsigned long ip, const char *fmt, ...)
3555 {
3556         int ret;
3557         va_list ap;
3558
3559         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3560                 return 0;
3561
3562         va_start(ap, fmt);
3563         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3564         va_end(ap);
3565         return ret;
3566 }
3567
3568 __printf(2, 0)
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3570 {
3571         return trace_array_vprintk(&global_trace, ip, fmt, args);
3572 }
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3574
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3576 {
3577         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3578
3579         iter->idx++;
3580         if (buf_iter)
3581                 ring_buffer_iter_advance(buf_iter);
3582 }
3583
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586                 unsigned long *lost_events)
3587 {
3588         struct ring_buffer_event *event;
3589         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3590
3591         if (buf_iter) {
3592                 event = ring_buffer_iter_peek(buf_iter, ts);
3593                 if (lost_events)
3594                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595                                 (unsigned long)-1 : 0;
3596         } else {
3597                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3598                                          lost_events);
3599         }
3600
3601         if (event) {
3602                 iter->ent_size = ring_buffer_event_length(event);
3603                 return ring_buffer_event_data(event);
3604         }
3605         iter->ent_size = 0;
3606         return NULL;
3607 }
3608
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611                   unsigned long *missing_events, u64 *ent_ts)
3612 {
3613         struct trace_buffer *buffer = iter->array_buffer->buffer;
3614         struct trace_entry *ent, *next = NULL;
3615         unsigned long lost_events = 0, next_lost = 0;
3616         int cpu_file = iter->cpu_file;
3617         u64 next_ts = 0, ts;
3618         int next_cpu = -1;
3619         int next_size = 0;
3620         int cpu;
3621
3622         /*
3623          * If we are in a per_cpu trace file, don't bother by iterating over
3624          * all cpu and peek directly.
3625          */
3626         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3628                         return NULL;
3629                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3630                 if (ent_cpu)
3631                         *ent_cpu = cpu_file;
3632
3633                 return ent;
3634         }
3635
3636         for_each_tracing_cpu(cpu) {
3637
3638                 if (ring_buffer_empty_cpu(buffer, cpu))
3639                         continue;
3640
3641                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3642
3643                 /*
3644                  * Pick the entry with the smallest timestamp:
3645                  */
3646                 if (ent && (!next || ts < next_ts)) {
3647                         next = ent;
3648                         next_cpu = cpu;
3649                         next_ts = ts;
3650                         next_lost = lost_events;
3651                         next_size = iter->ent_size;
3652                 }
3653         }
3654
3655         iter->ent_size = next_size;
3656
3657         if (ent_cpu)
3658                 *ent_cpu = next_cpu;
3659
3660         if (ent_ts)
3661                 *ent_ts = next_ts;
3662
3663         if (missing_events)
3664                 *missing_events = next_lost;
3665
3666         return next;
3667 }
3668
3669 #define STATIC_FMT_BUF_SIZE     128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3671
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3673 {
3674         char *tmp;
3675
3676         /*
3677          * iter->tr is NULL when used with tp_printk, which makes
3678          * this get called where it is not safe to call krealloc().
3679          */
3680         if (!iter->tr || iter->fmt == static_fmt_buf)
3681                 return NULL;
3682
3683         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3684                        GFP_KERNEL);
3685         if (tmp) {
3686                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3687                 iter->fmt = tmp;
3688         }
3689
3690         return tmp;
3691 }
3692
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3695                            bool star, int len)
3696 {
3697         unsigned long addr = (unsigned long)str;
3698         struct trace_event *trace_event;
3699         struct trace_event_call *event;
3700
3701         /* Ignore strings with no length */
3702         if (star && !len)
3703                 return true;
3704
3705         /* OK if part of the event data */
3706         if ((addr >= (unsigned long)iter->ent) &&
3707             (addr < (unsigned long)iter->ent + iter->ent_size))
3708                 return true;
3709
3710         /* OK if part of the temp seq buffer */
3711         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3713                 return true;
3714
3715         /* Core rodata can not be freed */
3716         if (is_kernel_rodata(addr))
3717                 return true;
3718
3719         if (trace_is_tracepoint_string(str))
3720                 return true;
3721
3722         /*
3723          * Now this could be a module event, referencing core module
3724          * data, which is OK.
3725          */
3726         if (!iter->ent)
3727                 return false;
3728
3729         trace_event = ftrace_find_event(iter->ent->type);
3730         if (!trace_event)
3731                 return false;
3732
3733         event = container_of(trace_event, struct trace_event_call, event);
3734         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3735                 return false;
3736
3737         /* Would rather have rodata, but this will suffice */
3738         if (within_module_core(addr, event->module))
3739                 return true;
3740
3741         return false;
3742 }
3743
3744 static const char *show_buffer(struct trace_seq *s)
3745 {
3746         struct seq_buf *seq = &s->seq;
3747
3748         seq_buf_terminate(seq);
3749
3750         return seq->buffer;
3751 }
3752
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3754
3755 static int test_can_verify_check(const char *fmt, ...)
3756 {
3757         char buf[16];
3758         va_list ap;
3759         int ret;
3760
3761         /*
3762          * The verifier is dependent on vsnprintf() modifies the va_list
3763          * passed to it, where it is sent as a reference. Some architectures
3764          * (like x86_32) passes it by value, which means that vsnprintf()
3765          * does not modify the va_list passed to it, and the verifier
3766          * would then need to be able to understand all the values that
3767          * vsnprintf can use. If it is passed by value, then the verifier
3768          * is disabled.
3769          */
3770         va_start(ap, fmt);
3771         vsnprintf(buf, 16, "%d", ap);
3772         ret = va_arg(ap, int);
3773         va_end(ap);
3774
3775         return ret;
3776 }
3777
3778 static void test_can_verify(void)
3779 {
3780         if (!test_can_verify_check("%d %d", 0, 1)) {
3781                 pr_info("trace event string verifier disabled\n");
3782                 static_branch_inc(&trace_no_verify);
3783         }
3784 }
3785
3786 /**
3787  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788  * @iter: The iterator that holds the seq buffer and the event being printed
3789  * @fmt: The format used to print the event
3790  * @ap: The va_list holding the data to print from @fmt.
3791  *
3792  * This writes the data into the @iter->seq buffer using the data from
3793  * @fmt and @ap. If the format has a %s, then the source of the string
3794  * is examined to make sure it is safe to print, otherwise it will
3795  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3796  * pointer.
3797  */
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3799                          va_list ap)
3800 {
3801         const char *p = fmt;
3802         const char *str;
3803         int i, j;
3804
3805         if (WARN_ON_ONCE(!fmt))
3806                 return;
3807
3808         if (static_branch_unlikely(&trace_no_verify))
3809                 goto print;
3810
3811         /* Don't bother checking when doing a ftrace_dump() */
3812         if (iter->fmt == static_fmt_buf)
3813                 goto print;
3814
3815         while (*p) {
3816                 bool star = false;
3817                 int len = 0;
3818
3819                 j = 0;
3820
3821                 /* We only care about %s and variants */
3822                 for (i = 0; p[i]; i++) {
3823                         if (i + 1 >= iter->fmt_size) {
3824                                 /*
3825                                  * If we can't expand the copy buffer,
3826                                  * just print it.
3827                                  */
3828                                 if (!trace_iter_expand_format(iter))
3829                                         goto print;
3830                         }
3831
3832                         if (p[i] == '\\' && p[i+1]) {
3833                                 i++;
3834                                 continue;
3835                         }
3836                         if (p[i] == '%') {
3837                                 /* Need to test cases like %08.*s */
3838                                 for (j = 1; p[i+j]; j++) {
3839                                         if (isdigit(p[i+j]) ||
3840                                             p[i+j] == '.')
3841                                                 continue;
3842                                         if (p[i+j] == '*') {
3843                                                 star = true;
3844                                                 continue;
3845                                         }
3846                                         break;
3847                                 }
3848                                 if (p[i+j] == 's')
3849                                         break;
3850                                 star = false;
3851                         }
3852                         j = 0;
3853                 }
3854                 /* If no %s found then just print normally */
3855                 if (!p[i])
3856                         break;
3857
3858                 /* Copy up to the %s, and print that */
3859                 strncpy(iter->fmt, p, i);
3860                 iter->fmt[i] = '\0';
3861                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3862
3863                 /*
3864                  * If iter->seq is full, the above call no longer guarantees
3865                  * that ap is in sync with fmt processing, and further calls
3866                  * to va_arg() can return wrong positional arguments.
3867                  *
3868                  * Ensure that ap is no longer used in this case.
3869                  */
3870                 if (iter->seq.full) {
3871                         p = "";
3872                         break;
3873                 }
3874
3875                 if (star)
3876                         len = va_arg(ap, int);
3877
3878                 /* The ap now points to the string data of the %s */
3879                 str = va_arg(ap, const char *);
3880
3881                 /*
3882                  * If you hit this warning, it is likely that the
3883                  * trace event in question used %s on a string that
3884                  * was saved at the time of the event, but may not be
3885                  * around when the trace is read. Use __string(),
3886                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887                  * instead. See samples/trace_events/trace-events-sample.h
3888                  * for reference.
3889                  */
3890                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891                               "fmt: '%s' current_buffer: '%s'",
3892                               fmt, show_buffer(&iter->seq))) {
3893                         int ret;
3894
3895                         /* Try to safely read the string */
3896                         if (star) {
3897                                 if (len + 1 > iter->fmt_size)
3898                                         len = iter->fmt_size - 1;
3899                                 if (len < 0)
3900                                         len = 0;
3901                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3902                                 iter->fmt[len] = 0;
3903                                 star = false;
3904                         } else {
3905                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3906                                                                   iter->fmt_size);
3907                         }
3908                         if (ret < 0)
3909                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3910                         else
3911                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3912                                                  str, iter->fmt);
3913                         str = "[UNSAFE-MEMORY]";
3914                         strcpy(iter->fmt, "%s");
3915                 } else {
3916                         strncpy(iter->fmt, p + i, j + 1);
3917                         iter->fmt[j+1] = '\0';
3918                 }
3919                 if (star)
3920                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3921                 else
3922                         trace_seq_printf(&iter->seq, iter->fmt, str);
3923
3924                 p += i + j + 1;
3925         }
3926  print:
3927         if (*p)
3928                 trace_seq_vprintf(&iter->seq, p, ap);
3929 }
3930
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3932 {
3933         const char *p, *new_fmt;
3934         char *q;
3935
3936         if (WARN_ON_ONCE(!fmt))
3937                 return fmt;
3938
3939         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3940                 return fmt;
3941
3942         p = fmt;
3943         new_fmt = q = iter->fmt;
3944         while (*p) {
3945                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946                         if (!trace_iter_expand_format(iter))
3947                                 return fmt;
3948
3949                         q += iter->fmt - new_fmt;
3950                         new_fmt = iter->fmt;
3951                 }
3952
3953                 *q++ = *p++;
3954
3955                 /* Replace %p with %px */
3956                 if (p[-1] == '%') {
3957                         if (p[0] == '%') {
3958                                 *q++ = *p++;
3959                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3960                                 *q++ = *p++;
3961                                 *q++ = 'x';
3962                         }
3963                 }
3964         }
3965         *q = '\0';
3966
3967         return new_fmt;
3968 }
3969
3970 #define STATIC_TEMP_BUF_SIZE    128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3972
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975                                           int *ent_cpu, u64 *ent_ts)
3976 {
3977         /* __find_next_entry will reset ent_size */
3978         int ent_size = iter->ent_size;
3979         struct trace_entry *entry;
3980
3981         /*
3982          * If called from ftrace_dump(), then the iter->temp buffer
3983          * will be the static_temp_buf and not created from kmalloc.
3984          * If the entry size is greater than the buffer, we can
3985          * not save it. Just return NULL in that case. This is only
3986          * used to add markers when two consecutive events' time
3987          * stamps have a large delta. See trace_print_lat_context()
3988          */
3989         if (iter->temp == static_temp_buf &&
3990             STATIC_TEMP_BUF_SIZE < ent_size)
3991                 return NULL;
3992
3993         /*
3994          * The __find_next_entry() may call peek_next_entry(), which may
3995          * call ring_buffer_peek() that may make the contents of iter->ent
3996          * undefined. Need to copy iter->ent now.
3997          */
3998         if (iter->ent && iter->ent != iter->temp) {
3999                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4001                         void *temp;
4002                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4003                         if (!temp)
4004                                 return NULL;
4005                         kfree(iter->temp);
4006                         iter->temp = temp;
4007                         iter->temp_size = iter->ent_size;
4008                 }
4009                 memcpy(iter->temp, iter->ent, iter->ent_size);
4010                 iter->ent = iter->temp;
4011         }
4012         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013         /* Put back the original ent_size */
4014         iter->ent_size = ent_size;
4015
4016         return entry;
4017 }
4018
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4021 {
4022         iter->ent = __find_next_entry(iter, &iter->cpu,
4023                                       &iter->lost_events, &iter->ts);
4024
4025         if (iter->ent)
4026                 trace_iterator_increment(iter);
4027
4028         return iter->ent ? iter : NULL;
4029 }
4030
4031 static void trace_consume(struct trace_iterator *iter)
4032 {
4033         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034                             &iter->lost_events);
4035 }
4036
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4038 {
4039         struct trace_iterator *iter = m->private;
4040         int i = (int)*pos;
4041         void *ent;
4042
4043         WARN_ON_ONCE(iter->leftover);
4044
4045         (*pos)++;
4046
4047         /* can't go backwards */
4048         if (iter->idx > i)
4049                 return NULL;
4050
4051         if (iter->idx < 0)
4052                 ent = trace_find_next_entry_inc(iter);
4053         else
4054                 ent = iter;
4055
4056         while (ent && iter->idx < i)
4057                 ent = trace_find_next_entry_inc(iter);
4058
4059         iter->pos = *pos;
4060
4061         return ent;
4062 }
4063
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4065 {
4066         struct ring_buffer_iter *buf_iter;
4067         unsigned long entries = 0;
4068         u64 ts;
4069
4070         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4071
4072         buf_iter = trace_buffer_iter(iter, cpu);
4073         if (!buf_iter)
4074                 return;
4075
4076         ring_buffer_iter_reset(buf_iter);
4077
4078         /*
4079          * We could have the case with the max latency tracers
4080          * that a reset never took place on a cpu. This is evident
4081          * by the timestamp being before the start of the buffer.
4082          */
4083         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084                 if (ts >= iter->array_buffer->time_start)
4085                         break;
4086                 entries++;
4087                 ring_buffer_iter_advance(buf_iter);
4088         }
4089
4090         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4091 }
4092
4093 /*
4094  * The current tracer is copied to avoid a global locking
4095  * all around.
4096  */
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4098 {
4099         struct trace_iterator *iter = m->private;
4100         struct trace_array *tr = iter->tr;
4101         int cpu_file = iter->cpu_file;
4102         void *p = NULL;
4103         loff_t l = 0;
4104         int cpu;
4105
4106         /*
4107          * copy the tracer to avoid using a global lock all around.
4108          * iter->trace is a copy of current_trace, the pointer to the
4109          * name may be used instead of a strcmp(), as iter->trace->name
4110          * will point to the same string as current_trace->name.
4111          */
4112         mutex_lock(&trace_types_lock);
4113         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114                 *iter->trace = *tr->current_trace;
4115         mutex_unlock(&trace_types_lock);
4116
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118         if (iter->snapshot && iter->trace->use_max_tr)
4119                 return ERR_PTR(-EBUSY);
4120 #endif
4121
4122         if (*pos != iter->pos) {
4123                 iter->ent = NULL;
4124                 iter->cpu = 0;
4125                 iter->idx = -1;
4126
4127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128                         for_each_tracing_cpu(cpu)
4129                                 tracing_iter_reset(iter, cpu);
4130                 } else
4131                         tracing_iter_reset(iter, cpu_file);
4132
4133                 iter->leftover = 0;
4134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4135                         ;
4136
4137         } else {
4138                 /*
4139                  * If we overflowed the seq_file before, then we want
4140                  * to just reuse the trace_seq buffer again.
4141                  */
4142                 if (iter->leftover)
4143                         p = iter;
4144                 else {
4145                         l = *pos - 1;
4146                         p = s_next(m, p, &l);
4147                 }
4148         }
4149
4150         trace_event_read_lock();
4151         trace_access_lock(cpu_file);
4152         return p;
4153 }
4154
4155 static void s_stop(struct seq_file *m, void *p)
4156 {
4157         struct trace_iterator *iter = m->private;
4158
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160         if (iter->snapshot && iter->trace->use_max_tr)
4161                 return;
4162 #endif
4163
4164         trace_access_unlock(iter->cpu_file);
4165         trace_event_read_unlock();
4166 }
4167
4168 static void
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170                       unsigned long *entries, int cpu)
4171 {
4172         unsigned long count;
4173
4174         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4175         /*
4176          * If this buffer has skipped entries, then we hold all
4177          * entries for the trace and we need to ignore the
4178          * ones before the time stamp.
4179          */
4180         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182                 /* total is the same as the entries */
4183                 *total = count;
4184         } else
4185                 *total = count +
4186                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4187         *entries = count;
4188 }
4189
4190 static void
4191 get_total_entries(struct array_buffer *buf,
4192                   unsigned long *total, unsigned long *entries)
4193 {
4194         unsigned long t, e;
4195         int cpu;
4196
4197         *total = 0;
4198         *entries = 0;
4199
4200         for_each_tracing_cpu(cpu) {
4201                 get_total_entries_cpu(buf, &t, &e, cpu);
4202                 *total += t;
4203                 *entries += e;
4204         }
4205 }
4206
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4208 {
4209         unsigned long total, entries;
4210
4211         if (!tr)
4212                 tr = &global_trace;
4213
4214         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4215
4216         return entries;
4217 }
4218
4219 unsigned long trace_total_entries(struct trace_array *tr)
4220 {
4221         unsigned long total, entries;
4222
4223         if (!tr)
4224                 tr = &global_trace;
4225
4226         get_total_entries(&tr->array_buffer, &total, &entries);
4227
4228         return entries;
4229 }
4230
4231 static void print_lat_help_header(struct seq_file *m)
4232 {
4233         seq_puts(m, "#                    _------=> CPU#            \n"
4234                     "#                   / _-----=> irqs-off/BH-disabled\n"
4235                     "#                  | / _----=> need-resched    \n"
4236                     "#                  || / _---=> hardirq/softirq \n"
4237                     "#                  ||| / _--=> preempt-depth   \n"
4238                     "#                  |||| / _-=> migrate-disable \n"
4239                     "#                  ||||| /     delay           \n"
4240                     "#  cmd     pid     |||||| time  |   caller     \n"
4241                     "#     \\   /        ||||||  \\    |    /       \n");
4242 }
4243
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4245 {
4246         unsigned long total;
4247         unsigned long entries;
4248
4249         get_total_entries(buf, &total, &entries);
4250         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4251                    entries, total, num_online_cpus());
4252         seq_puts(m, "#\n");
4253 }
4254
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4256                                    unsigned int flags)
4257 {
4258         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4259
4260         print_event_info(buf, m);
4261
4262         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4263         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4264 }
4265
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4267                                        unsigned int flags)
4268 {
4269         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270         static const char space[] = "            ";
4271         int prec = tgid ? 12 : 2;
4272
4273         print_event_info(buf, m);
4274
4275         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4276         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4277         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4278         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4279         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4280         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4281         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4282         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4283 }
4284
4285 void
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4287 {
4288         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289         struct array_buffer *buf = iter->array_buffer;
4290         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291         struct tracer *type = iter->trace;
4292         unsigned long entries;
4293         unsigned long total;
4294         const char *name = type->name;
4295
4296         get_total_entries(buf, &total, &entries);
4297
4298         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4299                    name, UTS_RELEASE);
4300         seq_puts(m, "# -----------------------------------"
4301                  "---------------------------------\n");
4302         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304                    nsecs_to_usecs(data->saved_latency),
4305                    entries,
4306                    total,
4307                    buf->cpu,
4308                    preempt_model_none()      ? "server" :
4309                    preempt_model_voluntary() ? "desktop" :
4310                    preempt_model_full()      ? "preempt" :
4311                    preempt_model_rt()        ? "preempt_rt" :
4312                    "unknown",
4313                    /* These are reserved for later use */
4314                    0, 0, 0, 0);
4315 #ifdef CONFIG_SMP
4316         seq_printf(m, " #P:%d)\n", num_online_cpus());
4317 #else
4318         seq_puts(m, ")\n");
4319 #endif
4320         seq_puts(m, "#    -----------------\n");
4321         seq_printf(m, "#    | task: %.16s-%d "
4322                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323                    data->comm, data->pid,
4324                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325                    data->policy, data->rt_priority);
4326         seq_puts(m, "#    -----------------\n");
4327
4328         if (data->critical_start) {
4329                 seq_puts(m, "#  => started at: ");
4330                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331                 trace_print_seq(m, &iter->seq);
4332                 seq_puts(m, "\n#  => ended at:   ");
4333                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334                 trace_print_seq(m, &iter->seq);
4335                 seq_puts(m, "\n#\n");
4336         }
4337
4338         seq_puts(m, "#\n");
4339 }
4340
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4342 {
4343         struct trace_seq *s = &iter->seq;
4344         struct trace_array *tr = iter->tr;
4345
4346         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4347                 return;
4348
4349         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4350                 return;
4351
4352         if (cpumask_available(iter->started) &&
4353             cpumask_test_cpu(iter->cpu, iter->started))
4354                 return;
4355
4356         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4357                 return;
4358
4359         if (cpumask_available(iter->started))
4360                 cpumask_set_cpu(iter->cpu, iter->started);
4361
4362         /* Don't print started cpu buffer for the first entry of the trace */
4363         if (iter->idx > 1)
4364                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4365                                 iter->cpu);
4366 }
4367
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4369 {
4370         struct trace_array *tr = iter->tr;
4371         struct trace_seq *s = &iter->seq;
4372         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373         struct trace_entry *entry;
4374         struct trace_event *event;
4375
4376         entry = iter->ent;
4377
4378         test_cpu_buff_start(iter);
4379
4380         event = ftrace_find_event(entry->type);
4381
4382         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384                         trace_print_lat_context(iter);
4385                 else
4386                         trace_print_context(iter);
4387         }
4388
4389         if (trace_seq_has_overflowed(s))
4390                 return TRACE_TYPE_PARTIAL_LINE;
4391
4392         if (event)
4393                 return event->funcs->trace(iter, sym_flags, event);
4394
4395         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4396
4397         return trace_handle_return(s);
4398 }
4399
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4401 {
4402         struct trace_array *tr = iter->tr;
4403         struct trace_seq *s = &iter->seq;
4404         struct trace_entry *entry;
4405         struct trace_event *event;
4406
4407         entry = iter->ent;
4408
4409         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410                 trace_seq_printf(s, "%d %d %llu ",
4411                                  entry->pid, iter->cpu, iter->ts);
4412
4413         if (trace_seq_has_overflowed(s))
4414                 return TRACE_TYPE_PARTIAL_LINE;
4415
4416         event = ftrace_find_event(entry->type);
4417         if (event)
4418                 return event->funcs->raw(iter, 0, event);
4419
4420         trace_seq_printf(s, "%d ?\n", entry->type);
4421
4422         return trace_handle_return(s);
4423 }
4424
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4426 {
4427         struct trace_array *tr = iter->tr;
4428         struct trace_seq *s = &iter->seq;
4429         unsigned char newline = '\n';
4430         struct trace_entry *entry;
4431         struct trace_event *event;
4432
4433         entry = iter->ent;
4434
4435         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4437                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4439                 if (trace_seq_has_overflowed(s))
4440                         return TRACE_TYPE_PARTIAL_LINE;
4441         }
4442
4443         event = ftrace_find_event(entry->type);
4444         if (event) {
4445                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446                 if (ret != TRACE_TYPE_HANDLED)
4447                         return ret;
4448         }
4449
4450         SEQ_PUT_FIELD(s, newline);
4451
4452         return trace_handle_return(s);
4453 }
4454
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4456 {
4457         struct trace_array *tr = iter->tr;
4458         struct trace_seq *s = &iter->seq;
4459         struct trace_entry *entry;
4460         struct trace_event *event;
4461
4462         entry = iter->ent;
4463
4464         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465                 SEQ_PUT_FIELD(s, entry->pid);
4466                 SEQ_PUT_FIELD(s, iter->cpu);
4467                 SEQ_PUT_FIELD(s, iter->ts);
4468                 if (trace_seq_has_overflowed(s))
4469                         return TRACE_TYPE_PARTIAL_LINE;
4470         }
4471
4472         event = ftrace_find_event(entry->type);
4473         return event ? event->funcs->binary(iter, 0, event) :
4474                 TRACE_TYPE_HANDLED;
4475 }
4476
4477 int trace_empty(struct trace_iterator *iter)
4478 {
4479         struct ring_buffer_iter *buf_iter;
4480         int cpu;
4481
4482         /* If we are looking at one CPU buffer, only check that one */
4483         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484                 cpu = iter->cpu_file;
4485                 buf_iter = trace_buffer_iter(iter, cpu);
4486                 if (buf_iter) {
4487                         if (!ring_buffer_iter_empty(buf_iter))
4488                                 return 0;
4489                 } else {
4490                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491                                 return 0;
4492                 }
4493                 return 1;
4494         }
4495
4496         for_each_tracing_cpu(cpu) {
4497                 buf_iter = trace_buffer_iter(iter, cpu);
4498                 if (buf_iter) {
4499                         if (!ring_buffer_iter_empty(buf_iter))
4500                                 return 0;
4501                 } else {
4502                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4503                                 return 0;
4504                 }
4505         }
4506
4507         return 1;
4508 }
4509
4510 /*  Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4512 {
4513         struct trace_array *tr = iter->tr;
4514         unsigned long trace_flags = tr->trace_flags;
4515         enum print_line_t ret;
4516
4517         if (iter->lost_events) {
4518                 if (iter->lost_events == (unsigned long)-1)
4519                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4520                                          iter->cpu);
4521                 else
4522                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523                                          iter->cpu, iter->lost_events);
4524                 if (trace_seq_has_overflowed(&iter->seq))
4525                         return TRACE_TYPE_PARTIAL_LINE;
4526         }
4527
4528         if (iter->trace && iter->trace->print_line) {
4529                 ret = iter->trace->print_line(iter);
4530                 if (ret != TRACE_TYPE_UNHANDLED)
4531                         return ret;
4532         }
4533
4534         if (iter->ent->type == TRACE_BPUTS &&
4535                         trace_flags & TRACE_ITER_PRINTK &&
4536                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537                 return trace_print_bputs_msg_only(iter);
4538
4539         if (iter->ent->type == TRACE_BPRINT &&
4540                         trace_flags & TRACE_ITER_PRINTK &&
4541                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542                 return trace_print_bprintk_msg_only(iter);
4543
4544         if (iter->ent->type == TRACE_PRINT &&
4545                         trace_flags & TRACE_ITER_PRINTK &&
4546                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547                 return trace_print_printk_msg_only(iter);
4548
4549         if (trace_flags & TRACE_ITER_BIN)
4550                 return print_bin_fmt(iter);
4551
4552         if (trace_flags & TRACE_ITER_HEX)
4553                 return print_hex_fmt(iter);
4554
4555         if (trace_flags & TRACE_ITER_RAW)
4556                 return print_raw_fmt(iter);
4557
4558         return print_trace_fmt(iter);
4559 }
4560
4561 void trace_latency_header(struct seq_file *m)
4562 {
4563         struct trace_iterator *iter = m->private;
4564         struct trace_array *tr = iter->tr;
4565
4566         /* print nothing if the buffers are empty */
4567         if (trace_empty(iter))
4568                 return;
4569
4570         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571                 print_trace_header(m, iter);
4572
4573         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574                 print_lat_help_header(m);
4575 }
4576
4577 void trace_default_header(struct seq_file *m)
4578 {
4579         struct trace_iterator *iter = m->private;
4580         struct trace_array *tr = iter->tr;
4581         unsigned long trace_flags = tr->trace_flags;
4582
4583         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4584                 return;
4585
4586         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587                 /* print nothing if the buffers are empty */
4588                 if (trace_empty(iter))
4589                         return;
4590                 print_trace_header(m, iter);
4591                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4592                         print_lat_help_header(m);
4593         } else {
4594                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4596                                 print_func_help_header_irq(iter->array_buffer,
4597                                                            m, trace_flags);
4598                         else
4599                                 print_func_help_header(iter->array_buffer, m,
4600                                                        trace_flags);
4601                 }
4602         }
4603 }
4604
4605 static void test_ftrace_alive(struct seq_file *m)
4606 {
4607         if (!ftrace_is_dead())
4608                 return;
4609         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4611 }
4612
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4615 {
4616         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618                     "#                      Takes a snapshot of the main buffer.\n"
4619                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620                     "#                      (Doesn't have to be '2' works with any number that\n"
4621                     "#                       is not a '0' or '1')\n");
4622 }
4623
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4625 {
4626         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4630 #else
4631         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632                     "#                     Must use main snapshot file to allocate.\n");
4633 #endif
4634         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635                     "#                      (Doesn't have to be '2' works with any number that\n"
4636                     "#                       is not a '0' or '1')\n");
4637 }
4638
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4640 {
4641         if (iter->tr->allocated_snapshot)
4642                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4643         else
4644                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4645
4646         seq_puts(m, "# Snapshot commands:\n");
4647         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648                 show_snapshot_main_help(m);
4649         else
4650                 show_snapshot_percpu_help(m);
4651 }
4652 #else
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4655 #endif
4656
4657 static int s_show(struct seq_file *m, void *v)
4658 {
4659         struct trace_iterator *iter = v;
4660         int ret;
4661
4662         if (iter->ent == NULL) {
4663                 if (iter->tr) {
4664                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4665                         seq_puts(m, "#\n");
4666                         test_ftrace_alive(m);
4667                 }
4668                 if (iter->snapshot && trace_empty(iter))
4669                         print_snapshot_help(m, iter);
4670                 else if (iter->trace && iter->trace->print_header)
4671                         iter->trace->print_header(m);
4672                 else
4673                         trace_default_header(m);
4674
4675         } else if (iter->leftover) {
4676                 /*
4677                  * If we filled the seq_file buffer earlier, we
4678                  * want to just show it now.
4679                  */
4680                 ret = trace_print_seq(m, &iter->seq);
4681
4682                 /* ret should this time be zero, but you never know */
4683                 iter->leftover = ret;
4684
4685         } else {
4686                 print_trace_line(iter);
4687                 ret = trace_print_seq(m, &iter->seq);
4688                 /*
4689                  * If we overflow the seq_file buffer, then it will
4690                  * ask us for this data again at start up.
4691                  * Use that instead.
4692                  *  ret is 0 if seq_file write succeeded.
4693                  *        -1 otherwise.
4694                  */
4695                 iter->leftover = ret;
4696         }
4697
4698         return 0;
4699 }
4700
4701 /*
4702  * Should be used after trace_array_get(), trace_types_lock
4703  * ensures that i_cdev was already initialized.
4704  */
4705 static inline int tracing_get_cpu(struct inode *inode)
4706 {
4707         if (inode->i_cdev) /* See trace_create_cpu_file() */
4708                 return (long)inode->i_cdev - 1;
4709         return RING_BUFFER_ALL_CPUS;
4710 }
4711
4712 static const struct seq_operations tracer_seq_ops = {
4713         .start          = s_start,
4714         .next           = s_next,
4715         .stop           = s_stop,
4716         .show           = s_show,
4717 };
4718
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4721 {
4722         struct trace_array *tr = inode->i_private;
4723         struct trace_iterator *iter;
4724         int cpu;
4725
4726         if (tracing_disabled)
4727                 return ERR_PTR(-ENODEV);
4728
4729         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4730         if (!iter)
4731                 return ERR_PTR(-ENOMEM);
4732
4733         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4734                                     GFP_KERNEL);
4735         if (!iter->buffer_iter)
4736                 goto release;
4737
4738         /*
4739          * trace_find_next_entry() may need to save off iter->ent.
4740          * It will place it into the iter->temp buffer. As most
4741          * events are less than 128, allocate a buffer of that size.
4742          * If one is greater, then trace_find_next_entry() will
4743          * allocate a new buffer to adjust for the bigger iter->ent.
4744          * It's not critical if it fails to get allocated here.
4745          */
4746         iter->temp = kmalloc(128, GFP_KERNEL);
4747         if (iter->temp)
4748                 iter->temp_size = 128;
4749
4750         /*
4751          * trace_event_printf() may need to modify given format
4752          * string to replace %p with %px so that it shows real address
4753          * instead of hash value. However, that is only for the event
4754          * tracing, other tracer may not need. Defer the allocation
4755          * until it is needed.
4756          */
4757         iter->fmt = NULL;
4758         iter->fmt_size = 0;
4759
4760         /*
4761          * We make a copy of the current tracer to avoid concurrent
4762          * changes on it while we are reading.
4763          */
4764         mutex_lock(&trace_types_lock);
4765         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4766         if (!iter->trace)
4767                 goto fail;
4768
4769         *iter->trace = *tr->current_trace;
4770
4771         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4772                 goto fail;
4773
4774         iter->tr = tr;
4775
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777         /* Currently only the top directory has a snapshot */
4778         if (tr->current_trace->print_max || snapshot)
4779                 iter->array_buffer = &tr->max_buffer;
4780         else
4781 #endif
4782                 iter->array_buffer = &tr->array_buffer;
4783         iter->snapshot = snapshot;
4784         iter->pos = -1;
4785         iter->cpu_file = tracing_get_cpu(inode);
4786         mutex_init(&iter->mutex);
4787
4788         /* Notify the tracer early; before we stop tracing. */
4789         if (iter->trace->open)
4790                 iter->trace->open(iter);
4791
4792         /* Annotate start of buffers if we had overruns */
4793         if (ring_buffer_overruns(iter->array_buffer->buffer))
4794                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4795
4796         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797         if (trace_clocks[tr->clock_id].in_ns)
4798                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4799
4800         /*
4801          * If pause-on-trace is enabled, then stop the trace while
4802          * dumping, unless this is the "snapshot" file
4803          */
4804         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805                 tracing_stop_tr(tr);
4806
4807         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808                 for_each_tracing_cpu(cpu) {
4809                         iter->buffer_iter[cpu] =
4810                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4811                                                          cpu, GFP_KERNEL);
4812                 }
4813                 ring_buffer_read_prepare_sync();
4814                 for_each_tracing_cpu(cpu) {
4815                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4816                         tracing_iter_reset(iter, cpu);
4817                 }
4818         } else {
4819                 cpu = iter->cpu_file;
4820                 iter->buffer_iter[cpu] =
4821                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4822                                                  cpu, GFP_KERNEL);
4823                 ring_buffer_read_prepare_sync();
4824                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                 tracing_iter_reset(iter, cpu);
4826         }
4827
4828         mutex_unlock(&trace_types_lock);
4829
4830         return iter;
4831
4832  fail:
4833         mutex_unlock(&trace_types_lock);
4834         kfree(iter->trace);
4835         kfree(iter->temp);
4836         kfree(iter->buffer_iter);
4837 release:
4838         seq_release_private(inode, file);
4839         return ERR_PTR(-ENOMEM);
4840 }
4841
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4843 {
4844         int ret;
4845
4846         ret = tracing_check_open_get_tr(NULL);
4847         if (ret)
4848                 return ret;
4849
4850         filp->private_data = inode->i_private;
4851         return 0;
4852 }
4853
4854 bool tracing_is_disabled(void)
4855 {
4856         return (tracing_disabled) ? true: false;
4857 }
4858
4859 /*
4860  * Open and update trace_array ref count.
4861  * Must have the current trace_array passed to it.
4862  */
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866         int ret;
4867
4868         ret = tracing_check_open_get_tr(tr);
4869         if (ret)
4870                 return ret;
4871
4872         filp->private_data = inode->i_private;
4873
4874         return 0;
4875 }
4876
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4878 {
4879         stream_open(inode, filp);
4880         return tracing_open_generic_tr(inode, filp);
4881 }
4882
4883 static int tracing_release(struct inode *inode, struct file *file)
4884 {
4885         struct trace_array *tr = inode->i_private;
4886         struct seq_file *m = file->private_data;
4887         struct trace_iterator *iter;
4888         int cpu;
4889
4890         if (!(file->f_mode & FMODE_READ)) {
4891                 trace_array_put(tr);
4892                 return 0;
4893         }
4894
4895         /* Writes do not use seq_file */
4896         iter = m->private;
4897         mutex_lock(&trace_types_lock);
4898
4899         for_each_tracing_cpu(cpu) {
4900                 if (iter->buffer_iter[cpu])
4901                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4902         }
4903
4904         if (iter->trace && iter->trace->close)
4905                 iter->trace->close(iter);
4906
4907         if (!iter->snapshot && tr->stop_count)
4908                 /* reenable tracing if it was previously enabled */
4909                 tracing_start_tr(tr);
4910
4911         __trace_array_put(tr);
4912
4913         mutex_unlock(&trace_types_lock);
4914
4915         mutex_destroy(&iter->mutex);
4916         free_cpumask_var(iter->started);
4917         kfree(iter->fmt);
4918         kfree(iter->temp);
4919         kfree(iter->trace);
4920         kfree(iter->buffer_iter);
4921         seq_release_private(inode, file);
4922
4923         return 0;
4924 }
4925
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4927 {
4928         struct trace_array *tr = inode->i_private;
4929
4930         trace_array_put(tr);
4931         return 0;
4932 }
4933
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4935 {
4936         struct trace_array *tr = inode->i_private;
4937
4938         trace_array_put(tr);
4939
4940         return single_release(inode, file);
4941 }
4942
4943 static int tracing_open(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946         struct trace_iterator *iter;
4947         int ret;
4948
4949         ret = tracing_check_open_get_tr(tr);
4950         if (ret)
4951                 return ret;
4952
4953         /* If this file was open for write, then erase contents */
4954         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955                 int cpu = tracing_get_cpu(inode);
4956                 struct array_buffer *trace_buf = &tr->array_buffer;
4957
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959                 if (tr->current_trace->print_max)
4960                         trace_buf = &tr->max_buffer;
4961 #endif
4962
4963                 if (cpu == RING_BUFFER_ALL_CPUS)
4964                         tracing_reset_online_cpus(trace_buf);
4965                 else
4966                         tracing_reset_cpu(trace_buf, cpu);
4967         }
4968
4969         if (file->f_mode & FMODE_READ) {
4970                 iter = __tracing_open(inode, file, false);
4971                 if (IS_ERR(iter))
4972                         ret = PTR_ERR(iter);
4973                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4975         }
4976
4977         if (ret < 0)
4978                 trace_array_put(tr);
4979
4980         return ret;
4981 }
4982
4983 /*
4984  * Some tracers are not suitable for instance buffers.
4985  * A tracer is always available for the global array (toplevel)
4986  * or if it explicitly states that it is.
4987  */
4988 static bool
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4990 {
4991         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4992 }
4993
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4997 {
4998         while (t && !trace_ok_for_array(t, tr))
4999                 t = t->next;
5000
5001         return t;
5002 }
5003
5004 static void *
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5006 {
5007         struct trace_array *tr = m->private;
5008         struct tracer *t = v;
5009
5010         (*pos)++;
5011
5012         if (t)
5013                 t = get_tracer_for_array(tr, t->next);
5014
5015         return t;
5016 }
5017
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5019 {
5020         struct trace_array *tr = m->private;
5021         struct tracer *t;
5022         loff_t l = 0;
5023
5024         mutex_lock(&trace_types_lock);
5025
5026         t = get_tracer_for_array(tr, trace_types);
5027         for (; t && l < *pos; t = t_next(m, t, &l))
5028                         ;
5029
5030         return t;
5031 }
5032
5033 static void t_stop(struct seq_file *m, void *p)
5034 {
5035         mutex_unlock(&trace_types_lock);
5036 }
5037
5038 static int t_show(struct seq_file *m, void *v)
5039 {
5040         struct tracer *t = v;
5041
5042         if (!t)
5043                 return 0;
5044
5045         seq_puts(m, t->name);
5046         if (t->next)
5047                 seq_putc(m, ' ');
5048         else
5049                 seq_putc(m, '\n');
5050
5051         return 0;
5052 }
5053
5054 static const struct seq_operations show_traces_seq_ops = {
5055         .start          = t_start,
5056         .next           = t_next,
5057         .stop           = t_stop,
5058         .show           = t_show,
5059 };
5060
5061 static int show_traces_open(struct inode *inode, struct file *file)
5062 {
5063         struct trace_array *tr = inode->i_private;
5064         struct seq_file *m;
5065         int ret;
5066
5067         ret = tracing_check_open_get_tr(tr);
5068         if (ret)
5069                 return ret;
5070
5071         ret = seq_open(file, &show_traces_seq_ops);
5072         if (ret) {
5073                 trace_array_put(tr);
5074                 return ret;
5075         }
5076
5077         m = file->private_data;
5078         m->private = tr;
5079
5080         return 0;
5081 }
5082
5083 static int show_traces_release(struct inode *inode, struct file *file)
5084 {
5085         struct trace_array *tr = inode->i_private;
5086
5087         trace_array_put(tr);
5088         return seq_release(inode, file);
5089 }
5090
5091 static ssize_t
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093                    size_t count, loff_t *ppos)
5094 {
5095         return count;
5096 }
5097
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5099 {
5100         int ret;
5101
5102         if (file->f_mode & FMODE_READ)
5103                 ret = seq_lseek(file, offset, whence);
5104         else
5105                 file->f_pos = ret = 0;
5106
5107         return ret;
5108 }
5109
5110 static const struct file_operations tracing_fops = {
5111         .open           = tracing_open,
5112         .read           = seq_read,
5113         .write          = tracing_write_stub,
5114         .llseek         = tracing_lseek,
5115         .release        = tracing_release,
5116 };
5117
5118 static const struct file_operations show_traces_fops = {
5119         .open           = show_traces_open,
5120         .read           = seq_read,
5121         .llseek         = seq_lseek,
5122         .release        = show_traces_release,
5123 };
5124
5125 static ssize_t
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127                      size_t count, loff_t *ppos)
5128 {
5129         struct trace_array *tr = file_inode(filp)->i_private;
5130         char *mask_str;
5131         int len;
5132
5133         len = snprintf(NULL, 0, "%*pb\n",
5134                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135         mask_str = kmalloc(len, GFP_KERNEL);
5136         if (!mask_str)
5137                 return -ENOMEM;
5138
5139         len = snprintf(mask_str, len, "%*pb\n",
5140                        cpumask_pr_args(tr->tracing_cpumask));
5141         if (len >= count) {
5142                 count = -EINVAL;
5143                 goto out_err;
5144         }
5145         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5146
5147 out_err:
5148         kfree(mask_str);
5149
5150         return count;
5151 }
5152
5153 int tracing_set_cpumask(struct trace_array *tr,
5154                         cpumask_var_t tracing_cpumask_new)
5155 {
5156         int cpu;
5157
5158         if (!tr)
5159                 return -EINVAL;
5160
5161         local_irq_disable();
5162         arch_spin_lock(&tr->max_lock);
5163         for_each_tracing_cpu(cpu) {
5164                 /*
5165                  * Increase/decrease the disabled counter if we are
5166                  * about to flip a bit in the cpumask:
5167                  */
5168                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5172                 }
5173                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5177                 }
5178         }
5179         arch_spin_unlock(&tr->max_lock);
5180         local_irq_enable();
5181
5182         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5183
5184         return 0;
5185 }
5186
5187 static ssize_t
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189                       size_t count, loff_t *ppos)
5190 {
5191         struct trace_array *tr = file_inode(filp)->i_private;
5192         cpumask_var_t tracing_cpumask_new;
5193         int err;
5194
5195         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5196                 return -ENOMEM;
5197
5198         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5199         if (err)
5200                 goto err_free;
5201
5202         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5203         if (err)
5204                 goto err_free;
5205
5206         free_cpumask_var(tracing_cpumask_new);
5207
5208         return count;
5209
5210 err_free:
5211         free_cpumask_var(tracing_cpumask_new);
5212
5213         return err;
5214 }
5215
5216 static const struct file_operations tracing_cpumask_fops = {
5217         .open           = tracing_open_generic_tr,
5218         .read           = tracing_cpumask_read,
5219         .write          = tracing_cpumask_write,
5220         .release        = tracing_release_generic_tr,
5221         .llseek         = generic_file_llseek,
5222 };
5223
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5225 {
5226         struct tracer_opt *trace_opts;
5227         struct trace_array *tr = m->private;
5228         u32 tracer_flags;
5229         int i;
5230
5231         mutex_lock(&trace_types_lock);
5232         tracer_flags = tr->current_trace->flags->val;
5233         trace_opts = tr->current_trace->flags->opts;
5234
5235         for (i = 0; trace_options[i]; i++) {
5236                 if (tr->trace_flags & (1 << i))
5237                         seq_printf(m, "%s\n", trace_options[i]);
5238                 else
5239                         seq_printf(m, "no%s\n", trace_options[i]);
5240         }
5241
5242         for (i = 0; trace_opts[i].name; i++) {
5243                 if (tracer_flags & trace_opts[i].bit)
5244                         seq_printf(m, "%s\n", trace_opts[i].name);
5245                 else
5246                         seq_printf(m, "no%s\n", trace_opts[i].name);
5247         }
5248         mutex_unlock(&trace_types_lock);
5249
5250         return 0;
5251 }
5252
5253 static int __set_tracer_option(struct trace_array *tr,
5254                                struct tracer_flags *tracer_flags,
5255                                struct tracer_opt *opts, int neg)
5256 {
5257         struct tracer *trace = tracer_flags->trace;
5258         int ret;
5259
5260         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5261         if (ret)
5262                 return ret;
5263
5264         if (neg)
5265                 tracer_flags->val &= ~opts->bit;
5266         else
5267                 tracer_flags->val |= opts->bit;
5268         return 0;
5269 }
5270
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5273 {
5274         struct tracer *trace = tr->current_trace;
5275         struct tracer_flags *tracer_flags = trace->flags;
5276         struct tracer_opt *opts = NULL;
5277         int i;
5278
5279         for (i = 0; tracer_flags->opts[i].name; i++) {
5280                 opts = &tracer_flags->opts[i];
5281
5282                 if (strcmp(cmp, opts->name) == 0)
5283                         return __set_tracer_option(tr, trace->flags, opts, neg);
5284         }
5285
5286         return -EINVAL;
5287 }
5288
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5291 {
5292         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5293                 return -1;
5294
5295         return 0;
5296 }
5297
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5299 {
5300         int *map;
5301
5302         if ((mask == TRACE_ITER_RECORD_TGID) ||
5303             (mask == TRACE_ITER_RECORD_CMD))
5304                 lockdep_assert_held(&event_mutex);
5305
5306         /* do nothing if flag is already set */
5307         if (!!(tr->trace_flags & mask) == !!enabled)
5308                 return 0;
5309
5310         /* Give the tracer a chance to approve the change */
5311         if (tr->current_trace->flag_changed)
5312                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5313                         return -EINVAL;
5314
5315         if (enabled)
5316                 tr->trace_flags |= mask;
5317         else
5318                 tr->trace_flags &= ~mask;
5319
5320         if (mask == TRACE_ITER_RECORD_CMD)
5321                 trace_event_enable_cmd_record(enabled);
5322
5323         if (mask == TRACE_ITER_RECORD_TGID) {
5324                 if (!tgid_map) {
5325                         tgid_map_max = pid_max;
5326                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5327                                        GFP_KERNEL);
5328
5329                         /*
5330                          * Pairs with smp_load_acquire() in
5331                          * trace_find_tgid_ptr() to ensure that if it observes
5332                          * the tgid_map we just allocated then it also observes
5333                          * the corresponding tgid_map_max value.
5334                          */
5335                         smp_store_release(&tgid_map, map);
5336                 }
5337                 if (!tgid_map) {
5338                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5339                         return -ENOMEM;
5340                 }
5341
5342                 trace_event_enable_tgid_record(enabled);
5343         }
5344
5345         if (mask == TRACE_ITER_EVENT_FORK)
5346                 trace_event_follow_fork(tr, enabled);
5347
5348         if (mask == TRACE_ITER_FUNC_FORK)
5349                 ftrace_pid_follow_fork(tr, enabled);
5350
5351         if (mask == TRACE_ITER_OVERWRITE) {
5352                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5355 #endif
5356         }
5357
5358         if (mask == TRACE_ITER_PRINTK) {
5359                 trace_printk_start_stop_comm(enabled);
5360                 trace_printk_control(enabled);
5361         }
5362
5363         return 0;
5364 }
5365
5366 int trace_set_options(struct trace_array *tr, char *option)
5367 {
5368         char *cmp;
5369         int neg = 0;
5370         int ret;
5371         size_t orig_len = strlen(option);
5372         int len;
5373
5374         cmp = strstrip(option);
5375
5376         len = str_has_prefix(cmp, "no");
5377         if (len)
5378                 neg = 1;
5379
5380         cmp += len;
5381
5382         mutex_lock(&event_mutex);
5383         mutex_lock(&trace_types_lock);
5384
5385         ret = match_string(trace_options, -1, cmp);
5386         /* If no option could be set, test the specific tracer options */
5387         if (ret < 0)
5388                 ret = set_tracer_option(tr, cmp, neg);
5389         else
5390                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5391
5392         mutex_unlock(&trace_types_lock);
5393         mutex_unlock(&event_mutex);
5394
5395         /*
5396          * If the first trailing whitespace is replaced with '\0' by strstrip,
5397          * turn it back into a space.
5398          */
5399         if (orig_len > strlen(option))
5400                 option[strlen(option)] = ' ';
5401
5402         return ret;
5403 }
5404
5405 static void __init apply_trace_boot_options(void)
5406 {
5407         char *buf = trace_boot_options_buf;
5408         char *option;
5409
5410         while (true) {
5411                 option = strsep(&buf, ",");
5412
5413                 if (!option)
5414                         break;
5415
5416                 if (*option)
5417                         trace_set_options(&global_trace, option);
5418
5419                 /* Put back the comma to allow this to be called again */
5420                 if (buf)
5421                         *(buf - 1) = ',';
5422         }
5423 }
5424
5425 static ssize_t
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427                         size_t cnt, loff_t *ppos)
5428 {
5429         struct seq_file *m = filp->private_data;
5430         struct trace_array *tr = m->private;
5431         char buf[64];
5432         int ret;
5433
5434         if (cnt >= sizeof(buf))
5435                 return -EINVAL;
5436
5437         if (copy_from_user(buf, ubuf, cnt))
5438                 return -EFAULT;
5439
5440         buf[cnt] = 0;
5441
5442         ret = trace_set_options(tr, buf);
5443         if (ret < 0)
5444                 return ret;
5445
5446         *ppos += cnt;
5447
5448         return cnt;
5449 }
5450
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5452 {
5453         struct trace_array *tr = inode->i_private;
5454         int ret;
5455
5456         ret = tracing_check_open_get_tr(tr);
5457         if (ret)
5458                 return ret;
5459
5460         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5461         if (ret < 0)
5462                 trace_array_put(tr);
5463
5464         return ret;
5465 }
5466
5467 static const struct file_operations tracing_iter_fops = {
5468         .open           = tracing_trace_options_open,
5469         .read           = seq_read,
5470         .llseek         = seq_lseek,
5471         .release        = tracing_single_release_tr,
5472         .write          = tracing_trace_options_write,
5473 };
5474
5475 static const char readme_msg[] =
5476         "tracing mini-HOWTO:\n\n"
5477         "# echo 0 > tracing_on : quick way to disable tracing\n"
5478         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479         " Important files:\n"
5480         "  trace\t\t\t- The static contents of the buffer\n"
5481         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5482         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483         "  current_tracer\t- function and latency tracers\n"
5484         "  available_tracers\t- list of configured tracers for current_tracer\n"
5485         "  error_log\t- error log for failed commands (that support it)\n"
5486         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5488         "  trace_clock\t\t- change the clock used to order events\n"
5489         "       local:   Per cpu clock but may not be synced across CPUs\n"
5490         "      global:   Synced across CPUs but slows tracing down.\n"
5491         "     counter:   Not a clock, but just an increment\n"
5492         "      uptime:   Jiffy counter from time of boot\n"
5493         "        perf:   Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495         "     x86-tsc:   TSC cycle counter\n"
5496 #endif
5497         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5498         "       delta:   Delta difference against a buffer-wide timestamp\n"
5499         "    absolute:   Absolute (standalone) timestamp\n"
5500         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5503         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504         "\t\t\t  Remove sub-buffer with rmdir\n"
5505         "  trace_options\t\t- Set format or modify how tracing happens\n"
5506         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5507         "\t\t\t  option name\n"
5508         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510         "\n  available_filter_functions - list of functions that can be filtered on\n"
5511         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5512         "\t\t\t  functions\n"
5513         "\t     accepts: func_full_name or glob-matching-pattern\n"
5514         "\t     modules: Can select a group via module\n"
5515         "\t      Format: :mod:<module-name>\n"
5516         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5517         "\t    triggers: a command to perform when function is hit\n"
5518         "\t      Format: <function>:<trigger>[:count]\n"
5519         "\t     trigger: traceon, traceoff\n"
5520         "\t\t      enable_event:<system>:<event>\n"
5521         "\t\t      disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5523         "\t\t      stacktrace\n"
5524 #endif
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5526         "\t\t      snapshot\n"
5527 #endif
5528         "\t\t      dump\n"
5529         "\t\t      cpudump\n"
5530         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5531         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532         "\t     The first one will disable tracing every time do_fault is hit\n"
5533         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5534         "\t       The first time do trap is hit and it disables tracing, the\n"
5535         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5536         "\t       the counter will not decrement. It only decrements when the\n"
5537         "\t       trigger did work\n"
5538         "\t     To remove trigger without count:\n"
5539         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5540         "\t     To remove trigger with a count:\n"
5541         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544         "\t    modules: Can select a group via module command :mod:\n"
5545         "\t    Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5549         "\t\t    (function)\n"
5550         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5551         "\t\t    (function)\n"
5552 #endif
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560         "\t\t\t  snapshot buffer. Read the contents for more\n"
5561         "\t\t\t  information\n"
5562 #endif
5563 #ifdef CONFIG_STACK_TRACER
5564         "  stack_trace\t\t- Shows the max stack trace when active\n"
5565         "  stack_max_size\t- Shows current max stack size that was traced\n"
5566         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5567         "\t\t\t  new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5570         "\t\t\t  traces\n"
5571 #endif
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_KPROBE_EVENTS
5578         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #ifdef CONFIG_UPROBE_EVENTS
5582         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5584 #endif
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586         "\t  accepts: event-definitions (one definition per line)\n"
5587         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5591 #endif
5592         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593         "\t           -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5597 #endif
5598 #ifdef CONFIG_UPROBE_EVENTS
5599   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5600 #endif
5601         "\t     args: <name>=fetcharg[:type]\n"
5602         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5605 #else
5606         "\t           $stack<index>, $stack, $retval, $comm,\n"
5607 #endif
5608         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611         "\t           <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613         "\t    field: <stype> <name>;\n"
5614         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615         "\t           [unsigned] char/int/long\n"
5616 #endif
5617         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5618         "\t            of the <attached-group>/<attached-event>.\n"
5619 #endif
5620         "  events/\t\t- Directory containing all trace event subsystems:\n"
5621         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624         "\t\t\t  events\n"
5625         "      filter\t\t- If set, only events passing filter are traced\n"
5626         "  events/<system>/<event>/\t- Directory containing control files for\n"
5627         "\t\t\t  <event>:\n"
5628         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629         "      filter\t\t- If set, only events passing filter are traced\n"
5630         "      trigger\t\t- If set, a command to perform when event is hit\n"
5631         "\t    Format: <trigger>[:count][if <filter>]\n"
5632         "\t   trigger: traceon, traceoff\n"
5633         "\t            enable_event:<system>:<event>\n"
5634         "\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636         "\t            enable_hist:<system>:<event>\n"
5637         "\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640         "\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643         "\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646         "\t\t    hist (see below)\n"
5647 #endif
5648         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651         "\t                  events/block/block_unplug/trigger\n"
5652         "\t   The first disables tracing every time block_unplug is hit.\n"
5653         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656         "\t   Like function triggers, the counter is only decremented if it\n"
5657         "\t    enabled or disabled tracing.\n"
5658         "\t   To remove a trigger without a count:\n"
5659         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660         "\t   To remove a trigger with a count:\n"
5661         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662         "\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5666         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667         "\t            [:values=<field1[,field2,...]>]\n"
5668         "\t            [:sort=<field1[,field2,...]>]\n"
5669         "\t            [:size=#entries]\n"
5670         "\t            [:pause][:continue][:clear]\n"
5671         "\t            [:name=histname1]\n"
5672         "\t            [:<handler>.<action>]\n"
5673         "\t            [if <filter>]\n\n"
5674         "\t    Note, special fields can be used as well:\n"
5675         "\t            common_timestamp - to record current timestamp\n"
5676         "\t            common_cpu - to record the CPU the event happened on\n"
5677         "\n"
5678         "\t    A hist trigger variable can be:\n"
5679         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5680         "\t        - a reference to another variable e.g. y=$x,\n"
5681         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5682         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5683         "\n"
5684         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5686         "\t    variable reference, field or numeric literal.\n"
5687         "\n"
5688         "\t    When a matching event is hit, an entry is added to a hash\n"
5689         "\t    table using the key(s) and value(s) named, and the value of a\n"
5690         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5691         "\t    correspond to fields in the event's format description.  Keys\n"
5692         "\t    can be any field, or the special string 'stacktrace'.\n"
5693         "\t    Compound keys consisting of up to two fields can be specified\n"
5694         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5695         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5696         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5697         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5698         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5699         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5700         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5701         "\t    its histogram data will be shared with other triggers of the\n"
5702         "\t    same name, and trigger hits will update this common data.\n\n"
5703         "\t    Reading the 'hist' file for the event will dump the hash\n"
5704         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5705         "\t    triggers attached to an event, there will be a table for each\n"
5706         "\t    trigger in the output.  The table displayed for a named\n"
5707         "\t    trigger will be the same as any other instance having the\n"
5708         "\t    same name.  The default format used to display a given field\n"
5709         "\t    can be modified by appending any of the following modifiers\n"
5710         "\t    to the field name, as applicable:\n\n"
5711         "\t            .hex        display a number as a hex value\n"
5712         "\t            .sym        display an address as a symbol\n"
5713         "\t            .sym-offset display an address as a symbol and offset\n"
5714         "\t            .execname   display a common_pid as a program name\n"
5715         "\t            .syscall    display a syscall id as a syscall name\n"
5716         "\t            .log2       display log2 value rather than raw number\n"
5717         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5718         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5719         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5720         "\t    trigger or to start a hist trigger but not log any events\n"
5721         "\t    until told to do so.  'continue' can be used to start or\n"
5722         "\t    restart a paused hist trigger.\n\n"
5723         "\t    The 'clear' parameter will clear the contents of a running\n"
5724         "\t    hist trigger and leave its current paused/active state\n"
5725         "\t    unchanged.\n\n"
5726         "\t    The enable_hist and disable_hist triggers can be used to\n"
5727         "\t    have one event conditionally start and stop another event's\n"
5728         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5729         "\t    the enable_event and disable_event triggers.\n\n"
5730         "\t    Hist trigger handlers and actions are executed whenever a\n"
5731         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5732         "\t        <handler>.<action>\n\n"
5733         "\t    The available handlers are:\n\n"
5734         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5735         "\t        onmax(var)               - invoke if var exceeds current max\n"
5736         "\t        onchange(var)            - invoke action if var changes\n\n"
5737         "\t    The available actions are:\n\n"
5738         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739         "\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745         "\t  Write into this file to define/undefine new synthetic events.\n"
5746         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750
5751 static ssize_t
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753                        size_t cnt, loff_t *ppos)
5754 {
5755         return simple_read_from_buffer(ubuf, cnt, ppos,
5756                                         readme_msg, strlen(readme_msg));
5757 }
5758
5759 static const struct file_operations tracing_readme_fops = {
5760         .open           = tracing_open_generic,
5761         .read           = tracing_readme_read,
5762         .llseek         = generic_file_llseek,
5763 };
5764
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767         int pid = ++(*pos);
5768
5769         return trace_find_tgid_ptr(pid);
5770 }
5771
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5773 {
5774         int pid = *pos;
5775
5776         return trace_find_tgid_ptr(pid);
5777 }
5778
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5780 {
5781 }
5782
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5784 {
5785         int *entry = (int *)v;
5786         int pid = entry - tgid_map;
5787         int tgid = *entry;
5788
5789         if (tgid == 0)
5790                 return SEQ_SKIP;
5791
5792         seq_printf(m, "%d %d\n", pid, tgid);
5793         return 0;
5794 }
5795
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797         .start          = saved_tgids_start,
5798         .stop           = saved_tgids_stop,
5799         .next           = saved_tgids_next,
5800         .show           = saved_tgids_show,
5801 };
5802
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5804 {
5805         int ret;
5806
5807         ret = tracing_check_open_get_tr(NULL);
5808         if (ret)
5809                 return ret;
5810
5811         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5812 }
5813
5814
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816         .open           = tracing_saved_tgids_open,
5817         .read           = seq_read,
5818         .llseek         = seq_lseek,
5819         .release        = seq_release,
5820 };
5821
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5823 {
5824         unsigned int *ptr = v;
5825
5826         if (*pos || m->count)
5827                 ptr++;
5828
5829         (*pos)++;
5830
5831         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5832              ptr++) {
5833                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5834                         continue;
5835
5836                 return ptr;
5837         }
5838
5839         return NULL;
5840 }
5841
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5843 {
5844         void *v;
5845         loff_t l = 0;
5846
5847         preempt_disable();
5848         arch_spin_lock(&trace_cmdline_lock);
5849
5850         v = &savedcmd->map_cmdline_to_pid[0];
5851         while (l <= *pos) {
5852                 v = saved_cmdlines_next(m, v, &l);
5853                 if (!v)
5854                         return NULL;
5855         }
5856
5857         return v;
5858 }
5859
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5861 {
5862         arch_spin_unlock(&trace_cmdline_lock);
5863         preempt_enable();
5864 }
5865
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5867 {
5868         char buf[TASK_COMM_LEN];
5869         unsigned int *pid = v;
5870
5871         __trace_find_cmdline(*pid, buf);
5872         seq_printf(m, "%d %s\n", *pid, buf);
5873         return 0;
5874 }
5875
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877         .start          = saved_cmdlines_start,
5878         .next           = saved_cmdlines_next,
5879         .stop           = saved_cmdlines_stop,
5880         .show           = saved_cmdlines_show,
5881 };
5882
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5884 {
5885         int ret;
5886
5887         ret = tracing_check_open_get_tr(NULL);
5888         if (ret)
5889                 return ret;
5890
5891         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5892 }
5893
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895         .open           = tracing_saved_cmdlines_open,
5896         .read           = seq_read,
5897         .llseek         = seq_lseek,
5898         .release        = seq_release,
5899 };
5900
5901 static ssize_t
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903                                  size_t cnt, loff_t *ppos)
5904 {
5905         char buf[64];
5906         int r;
5907
5908         preempt_disable();
5909         arch_spin_lock(&trace_cmdline_lock);
5910         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911         arch_spin_unlock(&trace_cmdline_lock);
5912         preempt_enable();
5913
5914         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5918 {
5919         kfree(s->saved_cmdlines);
5920         kfree(s->map_cmdline_to_pid);
5921         kfree(s);
5922 }
5923
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5925 {
5926         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5927
5928         s = kmalloc(sizeof(*s), GFP_KERNEL);
5929         if (!s)
5930                 return -ENOMEM;
5931
5932         if (allocate_cmdlines_buffer(val, s) < 0) {
5933                 kfree(s);
5934                 return -ENOMEM;
5935         }
5936
5937         preempt_disable();
5938         arch_spin_lock(&trace_cmdline_lock);
5939         savedcmd_temp = savedcmd;
5940         savedcmd = s;
5941         arch_spin_unlock(&trace_cmdline_lock);
5942         preempt_enable();
5943         free_saved_cmdlines_buffer(savedcmd_temp);
5944
5945         return 0;
5946 }
5947
5948 static ssize_t
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950                                   size_t cnt, loff_t *ppos)
5951 {
5952         unsigned long val;
5953         int ret;
5954
5955         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5956         if (ret)
5957                 return ret;
5958
5959         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960         if (!val || val > PID_MAX_DEFAULT)
5961                 return -EINVAL;
5962
5963         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5964         if (ret < 0)
5965                 return ret;
5966
5967         *ppos += cnt;
5968
5969         return cnt;
5970 }
5971
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973         .open           = tracing_open_generic,
5974         .read           = tracing_saved_cmdlines_size_read,
5975         .write          = tracing_saved_cmdlines_size_write,
5976 };
5977
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5981 {
5982         if (!ptr->map.eval_string) {
5983                 if (ptr->tail.next) {
5984                         ptr = ptr->tail.next;
5985                         /* Set ptr to the next real item (skip head) */
5986                         ptr++;
5987                 } else
5988                         return NULL;
5989         }
5990         return ptr;
5991 }
5992
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5994 {
5995         union trace_eval_map_item *ptr = v;
5996
5997         /*
5998          * Paranoid! If ptr points to end, we don't want to increment past it.
5999          * This really should never happen.
6000          */
6001         (*pos)++;
6002         ptr = update_eval_map(ptr);
6003         if (WARN_ON_ONCE(!ptr))
6004                 return NULL;
6005
6006         ptr++;
6007         ptr = update_eval_map(ptr);
6008
6009         return ptr;
6010 }
6011
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6013 {
6014         union trace_eval_map_item *v;
6015         loff_t l = 0;
6016
6017         mutex_lock(&trace_eval_mutex);
6018
6019         v = trace_eval_maps;
6020         if (v)
6021                 v++;
6022
6023         while (v && l < *pos) {
6024                 v = eval_map_next(m, v, &l);
6025         }
6026
6027         return v;
6028 }
6029
6030 static void eval_map_stop(struct seq_file *m, void *v)
6031 {
6032         mutex_unlock(&trace_eval_mutex);
6033 }
6034
6035 static int eval_map_show(struct seq_file *m, void *v)
6036 {
6037         union trace_eval_map_item *ptr = v;
6038
6039         seq_printf(m, "%s %ld (%s)\n",
6040                    ptr->map.eval_string, ptr->map.eval_value,
6041                    ptr->map.system);
6042
6043         return 0;
6044 }
6045
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047         .start          = eval_map_start,
6048         .next           = eval_map_next,
6049         .stop           = eval_map_stop,
6050         .show           = eval_map_show,
6051 };
6052
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6054 {
6055         int ret;
6056
6057         ret = tracing_check_open_get_tr(NULL);
6058         if (ret)
6059                 return ret;
6060
6061         return seq_open(filp, &tracing_eval_map_seq_ops);
6062 }
6063
6064 static const struct file_operations tracing_eval_map_fops = {
6065         .open           = tracing_eval_map_open,
6066         .read           = seq_read,
6067         .llseek         = seq_lseek,
6068         .release        = seq_release,
6069 };
6070
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6073 {
6074         /* Return tail of array given the head */
6075         return ptr + ptr->head.length + 1;
6076 }
6077
6078 static void
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6080                            int len)
6081 {
6082         struct trace_eval_map **stop;
6083         struct trace_eval_map **map;
6084         union trace_eval_map_item *map_array;
6085         union trace_eval_map_item *ptr;
6086
6087         stop = start + len;
6088
6089         /*
6090          * The trace_eval_maps contains the map plus a head and tail item,
6091          * where the head holds the module and length of array, and the
6092          * tail holds a pointer to the next list.
6093          */
6094         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6095         if (!map_array) {
6096                 pr_warn("Unable to allocate trace eval mapping\n");
6097                 return;
6098         }
6099
6100         mutex_lock(&trace_eval_mutex);
6101
6102         if (!trace_eval_maps)
6103                 trace_eval_maps = map_array;
6104         else {
6105                 ptr = trace_eval_maps;
6106                 for (;;) {
6107                         ptr = trace_eval_jmp_to_tail(ptr);
6108                         if (!ptr->tail.next)
6109                                 break;
6110                         ptr = ptr->tail.next;
6111
6112                 }
6113                 ptr->tail.next = map_array;
6114         }
6115         map_array->head.mod = mod;
6116         map_array->head.length = len;
6117         map_array++;
6118
6119         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120                 map_array->map = **map;
6121                 map_array++;
6122         }
6123         memset(map_array, 0, sizeof(*map_array));
6124
6125         mutex_unlock(&trace_eval_mutex);
6126 }
6127
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6129 {
6130         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131                           NULL, &tracing_eval_map_fops);
6132 }
6133
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137                               struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6139
6140 static void trace_insert_eval_map(struct module *mod,
6141                                   struct trace_eval_map **start, int len)
6142 {
6143         struct trace_eval_map **map;
6144
6145         if (len <= 0)
6146                 return;
6147
6148         map = start;
6149
6150         trace_event_eval_update(map, len);
6151
6152         trace_insert_eval_map_file(mod, start, len);
6153 }
6154
6155 static ssize_t
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157                        size_t cnt, loff_t *ppos)
6158 {
6159         struct trace_array *tr = filp->private_data;
6160         char buf[MAX_TRACER_SIZE+2];
6161         int r;
6162
6163         mutex_lock(&trace_types_lock);
6164         r = sprintf(buf, "%s\n", tr->current_trace->name);
6165         mutex_unlock(&trace_types_lock);
6166
6167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6168 }
6169
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6171 {
6172         tracing_reset_online_cpus(&tr->array_buffer);
6173         return t->init(tr);
6174 }
6175
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6177 {
6178         int cpu;
6179
6180         for_each_tracing_cpu(cpu)
6181                 per_cpu_ptr(buf->data, cpu)->entries = val;
6182 }
6183
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187                                         struct array_buffer *size_buf, int cpu_id)
6188 {
6189         int cpu, ret = 0;
6190
6191         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192                 for_each_tracing_cpu(cpu) {
6193                         ret = ring_buffer_resize(trace_buf->buffer,
6194                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6195                         if (ret < 0)
6196                                 break;
6197                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6198                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6199                 }
6200         } else {
6201                 ret = ring_buffer_resize(trace_buf->buffer,
6202                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6203                 if (ret == 0)
6204                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6206         }
6207
6208         return ret;
6209 }
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6211
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213                                         unsigned long size, int cpu)
6214 {
6215         int ret;
6216
6217         /*
6218          * If kernel or user changes the size of the ring buffer
6219          * we use the size that was given, and we can forget about
6220          * expanding it later.
6221          */
6222         ring_buffer_expanded = true;
6223
6224         /* May be called before buffers are initialized */
6225         if (!tr->array_buffer.buffer)
6226                 return 0;
6227
6228         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6229         if (ret < 0)
6230                 return ret;
6231
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234             !tr->current_trace->use_max_tr)
6235                 goto out;
6236
6237         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6238         if (ret < 0) {
6239                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240                                                      &tr->array_buffer, cpu);
6241                 if (r < 0) {
6242                         /*
6243                          * AARGH! We are left with different
6244                          * size max buffer!!!!
6245                          * The max buffer is our "snapshot" buffer.
6246                          * When a tracer needs a snapshot (one of the
6247                          * latency tracers), it swaps the max buffer
6248                          * with the saved snap shot. We succeeded to
6249                          * update the size of the main buffer, but failed to
6250                          * update the size of the max buffer. But when we tried
6251                          * to reset the main buffer to the original size, we
6252                          * failed there too. This is very unlikely to
6253                          * happen, but if it does, warn and kill all
6254                          * tracing.
6255                          */
6256                         WARN_ON(1);
6257                         tracing_disabled = 1;
6258                 }
6259                 return ret;
6260         }
6261
6262         if (cpu == RING_BUFFER_ALL_CPUS)
6263                 set_buffer_entries(&tr->max_buffer, size);
6264         else
6265                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6266
6267  out:
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6269
6270         if (cpu == RING_BUFFER_ALL_CPUS)
6271                 set_buffer_entries(&tr->array_buffer, size);
6272         else
6273                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6274
6275         return ret;
6276 }
6277
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279                                   unsigned long size, int cpu_id)
6280 {
6281         int ret;
6282
6283         mutex_lock(&trace_types_lock);
6284
6285         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286                 /* make sure, this cpu is enabled in the mask */
6287                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6288                         ret = -EINVAL;
6289                         goto out;
6290                 }
6291         }
6292
6293         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6294         if (ret < 0)
6295                 ret = -ENOMEM;
6296
6297 out:
6298         mutex_unlock(&trace_types_lock);
6299
6300         return ret;
6301 }
6302
6303
6304 /**
6305  * tracing_update_buffers - used by tracing facility to expand ring buffers
6306  *
6307  * To save on memory when the tracing is never used on a system with it
6308  * configured in. The ring buffers are set to a minimum size. But once
6309  * a user starts to use the tracing facility, then they need to grow
6310  * to their default size.
6311  *
6312  * This function is to be called when a tracer is about to be used.
6313  */
6314 int tracing_update_buffers(void)
6315 {
6316         int ret = 0;
6317
6318         mutex_lock(&trace_types_lock);
6319         if (!ring_buffer_expanded)
6320                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321                                                 RING_BUFFER_ALL_CPUS);
6322         mutex_unlock(&trace_types_lock);
6323
6324         return ret;
6325 }
6326
6327 struct trace_option_dentry;
6328
6329 static void
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6331
6332 /*
6333  * Used to clear out the tracer before deletion of an instance.
6334  * Must have trace_types_lock held.
6335  */
6336 static void tracing_set_nop(struct trace_array *tr)
6337 {
6338         if (tr->current_trace == &nop_trace)
6339                 return;
6340         
6341         tr->current_trace->enabled--;
6342
6343         if (tr->current_trace->reset)
6344                 tr->current_trace->reset(tr);
6345
6346         tr->current_trace = &nop_trace;
6347 }
6348
6349 static bool tracer_options_updated;
6350
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6352 {
6353         /* Only enable if the directory has been created already. */
6354         if (!tr->dir)
6355                 return;
6356
6357         /* Only create trace option files after update_tracer_options finish */
6358         if (!tracer_options_updated)
6359                 return;
6360
6361         create_trace_option_files(tr, t);
6362 }
6363
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6365 {
6366         struct tracer *t;
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368         bool had_max_tr;
6369 #endif
6370         int ret = 0;
6371
6372         mutex_lock(&trace_types_lock);
6373
6374         if (!ring_buffer_expanded) {
6375                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376                                                 RING_BUFFER_ALL_CPUS);
6377                 if (ret < 0)
6378                         goto out;
6379                 ret = 0;
6380         }
6381
6382         for (t = trace_types; t; t = t->next) {
6383                 if (strcmp(t->name, buf) == 0)
6384                         break;
6385         }
6386         if (!t) {
6387                 ret = -EINVAL;
6388                 goto out;
6389         }
6390         if (t == tr->current_trace)
6391                 goto out;
6392
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394         if (t->use_max_tr) {
6395                 local_irq_disable();
6396                 arch_spin_lock(&tr->max_lock);
6397                 if (tr->cond_snapshot)
6398                         ret = -EBUSY;
6399                 arch_spin_unlock(&tr->max_lock);
6400                 local_irq_enable();
6401                 if (ret)
6402                         goto out;
6403         }
6404 #endif
6405         /* Some tracers won't work on kernel command line */
6406         if (system_state < SYSTEM_RUNNING && t->noboot) {
6407                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6408                         t->name);
6409                 goto out;
6410         }
6411
6412         /* Some tracers are only allowed for the top level buffer */
6413         if (!trace_ok_for_array(t, tr)) {
6414                 ret = -EINVAL;
6415                 goto out;
6416         }
6417
6418         /* If trace pipe files are being read, we can't change the tracer */
6419         if (tr->trace_ref) {
6420                 ret = -EBUSY;
6421                 goto out;
6422         }
6423
6424         trace_branch_disable();
6425
6426         tr->current_trace->enabled--;
6427
6428         if (tr->current_trace->reset)
6429                 tr->current_trace->reset(tr);
6430
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432         had_max_tr = tr->current_trace->use_max_tr;
6433
6434         /* Current trace needs to be nop_trace before synchronize_rcu */
6435         tr->current_trace = &nop_trace;
6436
6437         if (had_max_tr && !t->use_max_tr) {
6438                 /*
6439                  * We need to make sure that the update_max_tr sees that
6440                  * current_trace changed to nop_trace to keep it from
6441                  * swapping the buffers after we resize it.
6442                  * The update_max_tr is called from interrupts disabled
6443                  * so a synchronized_sched() is sufficient.
6444                  */
6445                 synchronize_rcu();
6446                 free_snapshot(tr);
6447         }
6448
6449         if (t->use_max_tr && !tr->allocated_snapshot) {
6450                 ret = tracing_alloc_snapshot_instance(tr);
6451                 if (ret < 0)
6452                         goto out;
6453         }
6454 #else
6455         tr->current_trace = &nop_trace;
6456 #endif
6457
6458         if (t->init) {
6459                 ret = tracer_init(t, tr);
6460                 if (ret)
6461                         goto out;
6462         }
6463
6464         tr->current_trace = t;
6465         tr->current_trace->enabled++;
6466         trace_branch_enable(tr);
6467  out:
6468         mutex_unlock(&trace_types_lock);
6469
6470         return ret;
6471 }
6472
6473 static ssize_t
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475                         size_t cnt, loff_t *ppos)
6476 {
6477         struct trace_array *tr = filp->private_data;
6478         char buf[MAX_TRACER_SIZE+1];
6479         char *name;
6480         size_t ret;
6481         int err;
6482
6483         ret = cnt;
6484
6485         if (cnt > MAX_TRACER_SIZE)
6486                 cnt = MAX_TRACER_SIZE;
6487
6488         if (copy_from_user(buf, ubuf, cnt))
6489                 return -EFAULT;
6490
6491         buf[cnt] = 0;
6492
6493         name = strim(buf);
6494
6495         err = tracing_set_tracer(tr, name);
6496         if (err)
6497                 return err;
6498
6499         *ppos += ret;
6500
6501         return ret;
6502 }
6503
6504 static ssize_t
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506                    size_t cnt, loff_t *ppos)
6507 {
6508         char buf[64];
6509         int r;
6510
6511         r = snprintf(buf, sizeof(buf), "%ld\n",
6512                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513         if (r > sizeof(buf))
6514                 r = sizeof(buf);
6515         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6516 }
6517
6518 static ssize_t
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520                     size_t cnt, loff_t *ppos)
6521 {
6522         unsigned long val;
6523         int ret;
6524
6525         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6526         if (ret)
6527                 return ret;
6528
6529         *ptr = val * 1000;
6530
6531         return cnt;
6532 }
6533
6534 static ssize_t
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536                     size_t cnt, loff_t *ppos)
6537 {
6538         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6539 }
6540
6541 static ssize_t
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543                      size_t cnt, loff_t *ppos)
6544 {
6545         struct trace_array *tr = filp->private_data;
6546         int ret;
6547
6548         mutex_lock(&trace_types_lock);
6549         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6550         if (ret < 0)
6551                 goto out;
6552
6553         if (tr->current_trace->update_thresh) {
6554                 ret = tr->current_trace->update_thresh(tr);
6555                 if (ret < 0)
6556                         goto out;
6557         }
6558
6559         ret = cnt;
6560 out:
6561         mutex_unlock(&trace_types_lock);
6562
6563         return ret;
6564 }
6565
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6567
6568 static ssize_t
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570                      size_t cnt, loff_t *ppos)
6571 {
6572         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6573 }
6574
6575 static ssize_t
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577                       size_t cnt, loff_t *ppos)
6578 {
6579         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6580 }
6581
6582 #endif
6583
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6585 {
6586         struct trace_array *tr = inode->i_private;
6587         struct trace_iterator *iter;
6588         int ret;
6589
6590         ret = tracing_check_open_get_tr(tr);
6591         if (ret)
6592                 return ret;
6593
6594         mutex_lock(&trace_types_lock);
6595
6596         /* create a buffer to store the information to pass to userspace */
6597         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6598         if (!iter) {
6599                 ret = -ENOMEM;
6600                 __trace_array_put(tr);
6601                 goto out;
6602         }
6603
6604         trace_seq_init(&iter->seq);
6605         iter->trace = tr->current_trace;
6606
6607         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6608                 ret = -ENOMEM;
6609                 goto fail;
6610         }
6611
6612         /* trace pipe does not show start of buffer */
6613         cpumask_setall(iter->started);
6614
6615         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6617
6618         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619         if (trace_clocks[tr->clock_id].in_ns)
6620                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6621
6622         iter->tr = tr;
6623         iter->array_buffer = &tr->array_buffer;
6624         iter->cpu_file = tracing_get_cpu(inode);
6625         mutex_init(&iter->mutex);
6626         filp->private_data = iter;
6627
6628         if (iter->trace->pipe_open)
6629                 iter->trace->pipe_open(iter);
6630
6631         nonseekable_open(inode, filp);
6632
6633         tr->trace_ref++;
6634 out:
6635         mutex_unlock(&trace_types_lock);
6636         return ret;
6637
6638 fail:
6639         kfree(iter);
6640         __trace_array_put(tr);
6641         mutex_unlock(&trace_types_lock);
6642         return ret;
6643 }
6644
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6646 {
6647         struct trace_iterator *iter = file->private_data;
6648         struct trace_array *tr = inode->i_private;
6649
6650         mutex_lock(&trace_types_lock);
6651
6652         tr->trace_ref--;
6653
6654         if (iter->trace->pipe_close)
6655                 iter->trace->pipe_close(iter);
6656
6657         mutex_unlock(&trace_types_lock);
6658
6659         free_cpumask_var(iter->started);
6660         mutex_destroy(&iter->mutex);
6661         kfree(iter);
6662
6663         trace_array_put(tr);
6664
6665         return 0;
6666 }
6667
6668 static __poll_t
6669 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6670 {
6671         struct trace_array *tr = iter->tr;
6672
6673         /* Iterators are static, they should be filled or empty */
6674         if (trace_buffer_iter(iter, iter->cpu_file))
6675                 return EPOLLIN | EPOLLRDNORM;
6676
6677         if (tr->trace_flags & TRACE_ITER_BLOCK)
6678                 /*
6679                  * Always select as readable when in blocking mode
6680                  */
6681                 return EPOLLIN | EPOLLRDNORM;
6682         else
6683                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6684                                              filp, poll_table);
6685 }
6686
6687 static __poll_t
6688 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6689 {
6690         struct trace_iterator *iter = filp->private_data;
6691
6692         return trace_poll(iter, filp, poll_table);
6693 }
6694
6695 /* Must be called with iter->mutex held. */
6696 static int tracing_wait_pipe(struct file *filp)
6697 {
6698         struct trace_iterator *iter = filp->private_data;
6699         int ret;
6700
6701         while (trace_empty(iter)) {
6702
6703                 if ((filp->f_flags & O_NONBLOCK)) {
6704                         return -EAGAIN;
6705                 }
6706
6707                 /*
6708                  * We block until we read something and tracing is disabled.
6709                  * We still block if tracing is disabled, but we have never
6710                  * read anything. This allows a user to cat this file, and
6711                  * then enable tracing. But after we have read something,
6712                  * we give an EOF when tracing is again disabled.
6713                  *
6714                  * iter->pos will be 0 if we haven't read anything.
6715                  */
6716                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6717                         break;
6718
6719                 mutex_unlock(&iter->mutex);
6720
6721                 ret = wait_on_pipe(iter, 0);
6722
6723                 mutex_lock(&iter->mutex);
6724
6725                 if (ret)
6726                         return ret;
6727         }
6728
6729         return 1;
6730 }
6731
6732 /*
6733  * Consumer reader.
6734  */
6735 static ssize_t
6736 tracing_read_pipe(struct file *filp, char __user *ubuf,
6737                   size_t cnt, loff_t *ppos)
6738 {
6739         struct trace_iterator *iter = filp->private_data;
6740         ssize_t sret;
6741
6742         /*
6743          * Avoid more than one consumer on a single file descriptor
6744          * This is just a matter of traces coherency, the ring buffer itself
6745          * is protected.
6746          */
6747         mutex_lock(&iter->mutex);
6748
6749         /* return any leftover data */
6750         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6751         if (sret != -EBUSY)
6752                 goto out;
6753
6754         trace_seq_init(&iter->seq);
6755
6756         if (iter->trace->read) {
6757                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6758                 if (sret)
6759                         goto out;
6760         }
6761
6762 waitagain:
6763         sret = tracing_wait_pipe(filp);
6764         if (sret <= 0)
6765                 goto out;
6766
6767         /* stop when tracing is finished */
6768         if (trace_empty(iter)) {
6769                 sret = 0;
6770                 goto out;
6771         }
6772
6773         if (cnt >= PAGE_SIZE)
6774                 cnt = PAGE_SIZE - 1;
6775
6776         /* reset all but tr, trace, and overruns */
6777         trace_iterator_reset(iter);
6778         cpumask_clear(iter->started);
6779         trace_seq_init(&iter->seq);
6780
6781         trace_event_read_lock();
6782         trace_access_lock(iter->cpu_file);
6783         while (trace_find_next_entry_inc(iter) != NULL) {
6784                 enum print_line_t ret;
6785                 int save_len = iter->seq.seq.len;
6786
6787                 ret = print_trace_line(iter);
6788                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789                         /* don't print partial lines */
6790                         iter->seq.seq.len = save_len;
6791                         break;
6792                 }
6793                 if (ret != TRACE_TYPE_NO_CONSUME)
6794                         trace_consume(iter);
6795
6796                 if (trace_seq_used(&iter->seq) >= cnt)
6797                         break;
6798
6799                 /*
6800                  * Setting the full flag means we reached the trace_seq buffer
6801                  * size and we should leave by partial output condition above.
6802                  * One of the trace_seq_* functions is not used properly.
6803                  */
6804                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6805                           iter->ent->type);
6806         }
6807         trace_access_unlock(iter->cpu_file);
6808         trace_event_read_unlock();
6809
6810         /* Now copy what we have to the user */
6811         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6812         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6813                 trace_seq_init(&iter->seq);
6814
6815         /*
6816          * If there was nothing to send to user, in spite of consuming trace
6817          * entries, go back to wait for more entries.
6818          */
6819         if (sret == -EBUSY)
6820                 goto waitagain;
6821
6822 out:
6823         mutex_unlock(&iter->mutex);
6824
6825         return sret;
6826 }
6827
6828 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6829                                      unsigned int idx)
6830 {
6831         __free_page(spd->pages[idx]);
6832 }
6833
6834 static size_t
6835 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6836 {
6837         size_t count;
6838         int save_len;
6839         int ret;
6840
6841         /* Seq buffer is page-sized, exactly what we need. */
6842         for (;;) {
6843                 save_len = iter->seq.seq.len;
6844                 ret = print_trace_line(iter);
6845
6846                 if (trace_seq_has_overflowed(&iter->seq)) {
6847                         iter->seq.seq.len = save_len;
6848                         break;
6849                 }
6850
6851                 /*
6852                  * This should not be hit, because it should only
6853                  * be set if the iter->seq overflowed. But check it
6854                  * anyway to be safe.
6855                  */
6856                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6857                         iter->seq.seq.len = save_len;
6858                         break;
6859                 }
6860
6861                 count = trace_seq_used(&iter->seq) - save_len;
6862                 if (rem < count) {
6863                         rem = 0;
6864                         iter->seq.seq.len = save_len;
6865                         break;
6866                 }
6867
6868                 if (ret != TRACE_TYPE_NO_CONSUME)
6869                         trace_consume(iter);
6870                 rem -= count;
6871                 if (!trace_find_next_entry_inc(iter))   {
6872                         rem = 0;
6873                         iter->ent = NULL;
6874                         break;
6875                 }
6876         }
6877
6878         return rem;
6879 }
6880
6881 static ssize_t tracing_splice_read_pipe(struct file *filp,
6882                                         loff_t *ppos,
6883                                         struct pipe_inode_info *pipe,
6884                                         size_t len,
6885                                         unsigned int flags)
6886 {
6887         struct page *pages_def[PIPE_DEF_BUFFERS];
6888         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6889         struct trace_iterator *iter = filp->private_data;
6890         struct splice_pipe_desc spd = {
6891                 .pages          = pages_def,
6892                 .partial        = partial_def,
6893                 .nr_pages       = 0, /* This gets updated below. */
6894                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6895                 .ops            = &default_pipe_buf_ops,
6896                 .spd_release    = tracing_spd_release_pipe,
6897         };
6898         ssize_t ret;
6899         size_t rem;
6900         unsigned int i;
6901
6902         if (splice_grow_spd(pipe, &spd))
6903                 return -ENOMEM;
6904
6905         mutex_lock(&iter->mutex);
6906
6907         if (iter->trace->splice_read) {
6908                 ret = iter->trace->splice_read(iter, filp,
6909                                                ppos, pipe, len, flags);
6910                 if (ret)
6911                         goto out_err;
6912         }
6913
6914         ret = tracing_wait_pipe(filp);
6915         if (ret <= 0)
6916                 goto out_err;
6917
6918         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6919                 ret = -EFAULT;
6920                 goto out_err;
6921         }
6922
6923         trace_event_read_lock();
6924         trace_access_lock(iter->cpu_file);
6925
6926         /* Fill as many pages as possible. */
6927         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6928                 spd.pages[i] = alloc_page(GFP_KERNEL);
6929                 if (!spd.pages[i])
6930                         break;
6931
6932                 rem = tracing_fill_pipe_page(rem, iter);
6933
6934                 /* Copy the data into the page, so we can start over. */
6935                 ret = trace_seq_to_buffer(&iter->seq,
6936                                           page_address(spd.pages[i]),
6937                                           trace_seq_used(&iter->seq));
6938                 if (ret < 0) {
6939                         __free_page(spd.pages[i]);
6940                         break;
6941                 }
6942                 spd.partial[i].offset = 0;
6943                 spd.partial[i].len = trace_seq_used(&iter->seq);
6944
6945                 trace_seq_init(&iter->seq);
6946         }
6947
6948         trace_access_unlock(iter->cpu_file);
6949         trace_event_read_unlock();
6950         mutex_unlock(&iter->mutex);
6951
6952         spd.nr_pages = i;
6953
6954         if (i)
6955                 ret = splice_to_pipe(pipe, &spd);
6956         else
6957                 ret = 0;
6958 out:
6959         splice_shrink_spd(&spd);
6960         return ret;
6961
6962 out_err:
6963         mutex_unlock(&iter->mutex);
6964         goto out;
6965 }
6966
6967 static ssize_t
6968 tracing_entries_read(struct file *filp, char __user *ubuf,
6969                      size_t cnt, loff_t *ppos)
6970 {
6971         struct inode *inode = file_inode(filp);
6972         struct trace_array *tr = inode->i_private;
6973         int cpu = tracing_get_cpu(inode);
6974         char buf[64];
6975         int r = 0;
6976         ssize_t ret;
6977
6978         mutex_lock(&trace_types_lock);
6979
6980         if (cpu == RING_BUFFER_ALL_CPUS) {
6981                 int cpu, buf_size_same;
6982                 unsigned long size;
6983
6984                 size = 0;
6985                 buf_size_same = 1;
6986                 /* check if all cpu sizes are same */
6987                 for_each_tracing_cpu(cpu) {
6988                         /* fill in the size from first enabled cpu */
6989                         if (size == 0)
6990                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6991                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6992                                 buf_size_same = 0;
6993                                 break;
6994                         }
6995                 }
6996
6997                 if (buf_size_same) {
6998                         if (!ring_buffer_expanded)
6999                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7000                                             size >> 10,
7001                                             trace_buf_size >> 10);
7002                         else
7003                                 r = sprintf(buf, "%lu\n", size >> 10);
7004                 } else
7005                         r = sprintf(buf, "X\n");
7006         } else
7007                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7008
7009         mutex_unlock(&trace_types_lock);
7010
7011         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012         return ret;
7013 }
7014
7015 static ssize_t
7016 tracing_entries_write(struct file *filp, const char __user *ubuf,
7017                       size_t cnt, loff_t *ppos)
7018 {
7019         struct inode *inode = file_inode(filp);
7020         struct trace_array *tr = inode->i_private;
7021         unsigned long val;
7022         int ret;
7023
7024         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7025         if (ret)
7026                 return ret;
7027
7028         /* must have at least 1 entry */
7029         if (!val)
7030                 return -EINVAL;
7031
7032         /* value is in KB */
7033         val <<= 10;
7034         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7035         if (ret < 0)
7036                 return ret;
7037
7038         *ppos += cnt;
7039
7040         return cnt;
7041 }
7042
7043 static ssize_t
7044 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7045                                 size_t cnt, loff_t *ppos)
7046 {
7047         struct trace_array *tr = filp->private_data;
7048         char buf[64];
7049         int r, cpu;
7050         unsigned long size = 0, expanded_size = 0;
7051
7052         mutex_lock(&trace_types_lock);
7053         for_each_tracing_cpu(cpu) {
7054                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7055                 if (!ring_buffer_expanded)
7056                         expanded_size += trace_buf_size >> 10;
7057         }
7058         if (ring_buffer_expanded)
7059                 r = sprintf(buf, "%lu\n", size);
7060         else
7061                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7062         mutex_unlock(&trace_types_lock);
7063
7064         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7065 }
7066
7067 static ssize_t
7068 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7069                           size_t cnt, loff_t *ppos)
7070 {
7071         /*
7072          * There is no need to read what the user has written, this function
7073          * is just to make sure that there is no error when "echo" is used
7074          */
7075
7076         *ppos += cnt;
7077
7078         return cnt;
7079 }
7080
7081 static int
7082 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7083 {
7084         struct trace_array *tr = inode->i_private;
7085
7086         /* disable tracing ? */
7087         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7088                 tracer_tracing_off(tr);
7089         /* resize the ring buffer to 0 */
7090         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7091
7092         trace_array_put(tr);
7093
7094         return 0;
7095 }
7096
7097 static ssize_t
7098 tracing_mark_write(struct file *filp, const char __user *ubuf,
7099                                         size_t cnt, loff_t *fpos)
7100 {
7101         struct trace_array *tr = filp->private_data;
7102         struct ring_buffer_event *event;
7103         enum event_trigger_type tt = ETT_NONE;
7104         struct trace_buffer *buffer;
7105         struct print_entry *entry;
7106         ssize_t written;
7107         int size;
7108         int len;
7109
7110 /* Used in tracing_mark_raw_write() as well */
7111 #define FAULTED_STR "<faulted>"
7112 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7113
7114         if (tracing_disabled)
7115                 return -EINVAL;
7116
7117         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7118                 return -EINVAL;
7119
7120         if (cnt > TRACE_BUF_SIZE)
7121                 cnt = TRACE_BUF_SIZE;
7122
7123         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7124
7125         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7126
7127         /* If less than "<faulted>", then make sure we can still add that */
7128         if (cnt < FAULTED_SIZE)
7129                 size += FAULTED_SIZE - cnt;
7130
7131         buffer = tr->array_buffer.buffer;
7132         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7133                                             tracing_gen_ctx());
7134         if (unlikely(!event))
7135                 /* Ring buffer disabled, return as if not open for write */
7136                 return -EBADF;
7137
7138         entry = ring_buffer_event_data(event);
7139         entry->ip = _THIS_IP_;
7140
7141         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7142         if (len) {
7143                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7144                 cnt = FAULTED_SIZE;
7145                 written = -EFAULT;
7146         } else
7147                 written = cnt;
7148
7149         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7150                 /* do not add \n before testing triggers, but add \0 */
7151                 entry->buf[cnt] = '\0';
7152                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7153         }
7154
7155         if (entry->buf[cnt - 1] != '\n') {
7156                 entry->buf[cnt] = '\n';
7157                 entry->buf[cnt + 1] = '\0';
7158         } else
7159                 entry->buf[cnt] = '\0';
7160
7161         if (static_branch_unlikely(&trace_marker_exports_enabled))
7162                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7163         __buffer_unlock_commit(buffer, event);
7164
7165         if (tt)
7166                 event_triggers_post_call(tr->trace_marker_file, tt);
7167
7168         return written;
7169 }
7170
7171 /* Limit it for now to 3K (including tag) */
7172 #define RAW_DATA_MAX_SIZE (1024*3)
7173
7174 static ssize_t
7175 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7176                                         size_t cnt, loff_t *fpos)
7177 {
7178         struct trace_array *tr = filp->private_data;
7179         struct ring_buffer_event *event;
7180         struct trace_buffer *buffer;
7181         struct raw_data_entry *entry;
7182         ssize_t written;
7183         int size;
7184         int len;
7185
7186 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7187
7188         if (tracing_disabled)
7189                 return -EINVAL;
7190
7191         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7192                 return -EINVAL;
7193
7194         /* The marker must at least have a tag id */
7195         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7196                 return -EINVAL;
7197
7198         if (cnt > TRACE_BUF_SIZE)
7199                 cnt = TRACE_BUF_SIZE;
7200
7201         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7202
7203         size = sizeof(*entry) + cnt;
7204         if (cnt < FAULT_SIZE_ID)
7205                 size += FAULT_SIZE_ID - cnt;
7206
7207         buffer = tr->array_buffer.buffer;
7208         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7209                                             tracing_gen_ctx());
7210         if (!event)
7211                 /* Ring buffer disabled, return as if not open for write */
7212                 return -EBADF;
7213
7214         entry = ring_buffer_event_data(event);
7215
7216         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7217         if (len) {
7218                 entry->id = -1;
7219                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7220                 written = -EFAULT;
7221         } else
7222                 written = cnt;
7223
7224         __buffer_unlock_commit(buffer, event);
7225
7226         return written;
7227 }
7228
7229 static int tracing_clock_show(struct seq_file *m, void *v)
7230 {
7231         struct trace_array *tr = m->private;
7232         int i;
7233
7234         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7235                 seq_printf(m,
7236                         "%s%s%s%s", i ? " " : "",
7237                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7238                         i == tr->clock_id ? "]" : "");
7239         seq_putc(m, '\n');
7240
7241         return 0;
7242 }
7243
7244 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7245 {
7246         int i;
7247
7248         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7249                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7250                         break;
7251         }
7252         if (i == ARRAY_SIZE(trace_clocks))
7253                 return -EINVAL;
7254
7255         mutex_lock(&trace_types_lock);
7256
7257         tr->clock_id = i;
7258
7259         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7260
7261         /*
7262          * New clock may not be consistent with the previous clock.
7263          * Reset the buffer so that it doesn't have incomparable timestamps.
7264          */
7265         tracing_reset_online_cpus(&tr->array_buffer);
7266
7267 #ifdef CONFIG_TRACER_MAX_TRACE
7268         if (tr->max_buffer.buffer)
7269                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7270         tracing_reset_online_cpus(&tr->max_buffer);
7271 #endif
7272
7273         mutex_unlock(&trace_types_lock);
7274
7275         return 0;
7276 }
7277
7278 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7279                                    size_t cnt, loff_t *fpos)
7280 {
7281         struct seq_file *m = filp->private_data;
7282         struct trace_array *tr = m->private;
7283         char buf[64];
7284         const char *clockstr;
7285         int ret;
7286
7287         if (cnt >= sizeof(buf))
7288                 return -EINVAL;
7289
7290         if (copy_from_user(buf, ubuf, cnt))
7291                 return -EFAULT;
7292
7293         buf[cnt] = 0;
7294
7295         clockstr = strstrip(buf);
7296
7297         ret = tracing_set_clock(tr, clockstr);
7298         if (ret)
7299                 return ret;
7300
7301         *fpos += cnt;
7302
7303         return cnt;
7304 }
7305
7306 static int tracing_clock_open(struct inode *inode, struct file *file)
7307 {
7308         struct trace_array *tr = inode->i_private;
7309         int ret;
7310
7311         ret = tracing_check_open_get_tr(tr);
7312         if (ret)
7313                 return ret;
7314
7315         ret = single_open(file, tracing_clock_show, inode->i_private);
7316         if (ret < 0)
7317                 trace_array_put(tr);
7318
7319         return ret;
7320 }
7321
7322 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7323 {
7324         struct trace_array *tr = m->private;
7325
7326         mutex_lock(&trace_types_lock);
7327
7328         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7329                 seq_puts(m, "delta [absolute]\n");
7330         else
7331                 seq_puts(m, "[delta] absolute\n");
7332
7333         mutex_unlock(&trace_types_lock);
7334
7335         return 0;
7336 }
7337
7338 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7339 {
7340         struct trace_array *tr = inode->i_private;
7341         int ret;
7342
7343         ret = tracing_check_open_get_tr(tr);
7344         if (ret)
7345                 return ret;
7346
7347         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7348         if (ret < 0)
7349                 trace_array_put(tr);
7350
7351         return ret;
7352 }
7353
7354 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7355 {
7356         if (rbe == this_cpu_read(trace_buffered_event))
7357                 return ring_buffer_time_stamp(buffer);
7358
7359         return ring_buffer_event_time_stamp(buffer, rbe);
7360 }
7361
7362 /*
7363  * Set or disable using the per CPU trace_buffer_event when possible.
7364  */
7365 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7366 {
7367         int ret = 0;
7368
7369         mutex_lock(&trace_types_lock);
7370
7371         if (set && tr->no_filter_buffering_ref++)
7372                 goto out;
7373
7374         if (!set) {
7375                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7376                         ret = -EINVAL;
7377                         goto out;
7378                 }
7379
7380                 --tr->no_filter_buffering_ref;
7381         }
7382  out:
7383         mutex_unlock(&trace_types_lock);
7384
7385         return ret;
7386 }
7387
7388 struct ftrace_buffer_info {
7389         struct trace_iterator   iter;
7390         void                    *spare;
7391         unsigned int            spare_cpu;
7392         unsigned int            read;
7393 };
7394
7395 #ifdef CONFIG_TRACER_SNAPSHOT
7396 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7397 {
7398         struct trace_array *tr = inode->i_private;
7399         struct trace_iterator *iter;
7400         struct seq_file *m;
7401         int ret;
7402
7403         ret = tracing_check_open_get_tr(tr);
7404         if (ret)
7405                 return ret;
7406
7407         if (file->f_mode & FMODE_READ) {
7408                 iter = __tracing_open(inode, file, true);
7409                 if (IS_ERR(iter))
7410                         ret = PTR_ERR(iter);
7411         } else {
7412                 /* Writes still need the seq_file to hold the private data */
7413                 ret = -ENOMEM;
7414                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7415                 if (!m)
7416                         goto out;
7417                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7418                 if (!iter) {
7419                         kfree(m);
7420                         goto out;
7421                 }
7422                 ret = 0;
7423
7424                 iter->tr = tr;
7425                 iter->array_buffer = &tr->max_buffer;
7426                 iter->cpu_file = tracing_get_cpu(inode);
7427                 m->private = iter;
7428                 file->private_data = m;
7429         }
7430 out:
7431         if (ret < 0)
7432                 trace_array_put(tr);
7433
7434         return ret;
7435 }
7436
7437 static ssize_t
7438 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7439                        loff_t *ppos)
7440 {
7441         struct seq_file *m = filp->private_data;
7442         struct trace_iterator *iter = m->private;
7443         struct trace_array *tr = iter->tr;
7444         unsigned long val;
7445         int ret;
7446
7447         ret = tracing_update_buffers();
7448         if (ret < 0)
7449                 return ret;
7450
7451         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7452         if (ret)
7453                 return ret;
7454
7455         mutex_lock(&trace_types_lock);
7456
7457         if (tr->current_trace->use_max_tr) {
7458                 ret = -EBUSY;
7459                 goto out;
7460         }
7461
7462         local_irq_disable();
7463         arch_spin_lock(&tr->max_lock);
7464         if (tr->cond_snapshot)
7465                 ret = -EBUSY;
7466         arch_spin_unlock(&tr->max_lock);
7467         local_irq_enable();
7468         if (ret)
7469                 goto out;
7470
7471         switch (val) {
7472         case 0:
7473                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7474                         ret = -EINVAL;
7475                         break;
7476                 }
7477                 if (tr->allocated_snapshot)
7478                         free_snapshot(tr);
7479                 break;
7480         case 1:
7481 /* Only allow per-cpu swap if the ring buffer supports it */
7482 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7483                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7484                         ret = -EINVAL;
7485                         break;
7486                 }
7487 #endif
7488                 if (tr->allocated_snapshot)
7489                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7490                                         &tr->array_buffer, iter->cpu_file);
7491                 else
7492                         ret = tracing_alloc_snapshot_instance(tr);
7493                 if (ret < 0)
7494                         break;
7495                 local_irq_disable();
7496                 /* Now, we're going to swap */
7497                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7498                         update_max_tr(tr, current, smp_processor_id(), NULL);
7499                 else
7500                         update_max_tr_single(tr, current, iter->cpu_file);
7501                 local_irq_enable();
7502                 break;
7503         default:
7504                 if (tr->allocated_snapshot) {
7505                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7506                                 tracing_reset_online_cpus(&tr->max_buffer);
7507                         else
7508                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7509                 }
7510                 break;
7511         }
7512
7513         if (ret >= 0) {
7514                 *ppos += cnt;
7515                 ret = cnt;
7516         }
7517 out:
7518         mutex_unlock(&trace_types_lock);
7519         return ret;
7520 }
7521
7522 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7523 {
7524         struct seq_file *m = file->private_data;
7525         int ret;
7526
7527         ret = tracing_release(inode, file);
7528
7529         if (file->f_mode & FMODE_READ)
7530                 return ret;
7531
7532         /* If write only, the seq_file is just a stub */
7533         if (m)
7534                 kfree(m->private);
7535         kfree(m);
7536
7537         return 0;
7538 }
7539
7540 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7541 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7542                                     size_t count, loff_t *ppos);
7543 static int tracing_buffers_release(struct inode *inode, struct file *file);
7544 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7545                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7546
7547 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7548 {
7549         struct ftrace_buffer_info *info;
7550         int ret;
7551
7552         /* The following checks for tracefs lockdown */
7553         ret = tracing_buffers_open(inode, filp);
7554         if (ret < 0)
7555                 return ret;
7556
7557         info = filp->private_data;
7558
7559         if (info->iter.trace->use_max_tr) {
7560                 tracing_buffers_release(inode, filp);
7561                 return -EBUSY;
7562         }
7563
7564         info->iter.snapshot = true;
7565         info->iter.array_buffer = &info->iter.tr->max_buffer;
7566
7567         return ret;
7568 }
7569
7570 #endif /* CONFIG_TRACER_SNAPSHOT */
7571
7572
7573 static const struct file_operations tracing_thresh_fops = {
7574         .open           = tracing_open_generic,
7575         .read           = tracing_thresh_read,
7576         .write          = tracing_thresh_write,
7577         .llseek         = generic_file_llseek,
7578 };
7579
7580 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7581 static const struct file_operations tracing_max_lat_fops = {
7582         .open           = tracing_open_generic,
7583         .read           = tracing_max_lat_read,
7584         .write          = tracing_max_lat_write,
7585         .llseek         = generic_file_llseek,
7586 };
7587 #endif
7588
7589 static const struct file_operations set_tracer_fops = {
7590         .open           = tracing_open_generic,
7591         .read           = tracing_set_trace_read,
7592         .write          = tracing_set_trace_write,
7593         .llseek         = generic_file_llseek,
7594 };
7595
7596 static const struct file_operations tracing_pipe_fops = {
7597         .open           = tracing_open_pipe,
7598         .poll           = tracing_poll_pipe,
7599         .read           = tracing_read_pipe,
7600         .splice_read    = tracing_splice_read_pipe,
7601         .release        = tracing_release_pipe,
7602         .llseek         = no_llseek,
7603 };
7604
7605 static const struct file_operations tracing_entries_fops = {
7606         .open           = tracing_open_generic_tr,
7607         .read           = tracing_entries_read,
7608         .write          = tracing_entries_write,
7609         .llseek         = generic_file_llseek,
7610         .release        = tracing_release_generic_tr,
7611 };
7612
7613 static const struct file_operations tracing_total_entries_fops = {
7614         .open           = tracing_open_generic_tr,
7615         .read           = tracing_total_entries_read,
7616         .llseek         = generic_file_llseek,
7617         .release        = tracing_release_generic_tr,
7618 };
7619
7620 static const struct file_operations tracing_free_buffer_fops = {
7621         .open           = tracing_open_generic_tr,
7622         .write          = tracing_free_buffer_write,
7623         .release        = tracing_free_buffer_release,
7624 };
7625
7626 static const struct file_operations tracing_mark_fops = {
7627         .open           = tracing_mark_open,
7628         .write          = tracing_mark_write,
7629         .release        = tracing_release_generic_tr,
7630 };
7631
7632 static const struct file_operations tracing_mark_raw_fops = {
7633         .open           = tracing_mark_open,
7634         .write          = tracing_mark_raw_write,
7635         .release        = tracing_release_generic_tr,
7636 };
7637
7638 static const struct file_operations trace_clock_fops = {
7639         .open           = tracing_clock_open,
7640         .read           = seq_read,
7641         .llseek         = seq_lseek,
7642         .release        = tracing_single_release_tr,
7643         .write          = tracing_clock_write,
7644 };
7645
7646 static const struct file_operations trace_time_stamp_mode_fops = {
7647         .open           = tracing_time_stamp_mode_open,
7648         .read           = seq_read,
7649         .llseek         = seq_lseek,
7650         .release        = tracing_single_release_tr,
7651 };
7652
7653 #ifdef CONFIG_TRACER_SNAPSHOT
7654 static const struct file_operations snapshot_fops = {
7655         .open           = tracing_snapshot_open,
7656         .read           = seq_read,
7657         .write          = tracing_snapshot_write,
7658         .llseek         = tracing_lseek,
7659         .release        = tracing_snapshot_release,
7660 };
7661
7662 static const struct file_operations snapshot_raw_fops = {
7663         .open           = snapshot_raw_open,
7664         .read           = tracing_buffers_read,
7665         .release        = tracing_buffers_release,
7666         .splice_read    = tracing_buffers_splice_read,
7667         .llseek         = no_llseek,
7668 };
7669
7670 #endif /* CONFIG_TRACER_SNAPSHOT */
7671
7672 /*
7673  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7674  * @filp: The active open file structure
7675  * @ubuf: The userspace provided buffer to read value into
7676  * @cnt: The maximum number of bytes to read
7677  * @ppos: The current "file" position
7678  *
7679  * This function implements the write interface for a struct trace_min_max_param.
7680  * The filp->private_data must point to a trace_min_max_param structure that
7681  * defines where to write the value, the min and the max acceptable values,
7682  * and a lock to protect the write.
7683  */
7684 static ssize_t
7685 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7686 {
7687         struct trace_min_max_param *param = filp->private_data;
7688         u64 val;
7689         int err;
7690
7691         if (!param)
7692                 return -EFAULT;
7693
7694         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7695         if (err)
7696                 return err;
7697
7698         if (param->lock)
7699                 mutex_lock(param->lock);
7700
7701         if (param->min && val < *param->min)
7702                 err = -EINVAL;
7703
7704         if (param->max && val > *param->max)
7705                 err = -EINVAL;
7706
7707         if (!err)
7708                 *param->val = val;
7709
7710         if (param->lock)
7711                 mutex_unlock(param->lock);
7712
7713         if (err)
7714                 return err;
7715
7716         return cnt;
7717 }
7718
7719 /*
7720  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7721  * @filp: The active open file structure
7722  * @ubuf: The userspace provided buffer to read value into
7723  * @cnt: The maximum number of bytes to read
7724  * @ppos: The current "file" position
7725  *
7726  * This function implements the read interface for a struct trace_min_max_param.
7727  * The filp->private_data must point to a trace_min_max_param struct with valid
7728  * data.
7729  */
7730 static ssize_t
7731 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7732 {
7733         struct trace_min_max_param *param = filp->private_data;
7734         char buf[U64_STR_SIZE];
7735         int len;
7736         u64 val;
7737
7738         if (!param)
7739                 return -EFAULT;
7740
7741         val = *param->val;
7742
7743         if (cnt > sizeof(buf))
7744                 cnt = sizeof(buf);
7745
7746         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7747
7748         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7749 }
7750
7751 const struct file_operations trace_min_max_fops = {
7752         .open           = tracing_open_generic,
7753         .read           = trace_min_max_read,
7754         .write          = trace_min_max_write,
7755 };
7756
7757 #define TRACING_LOG_ERRS_MAX    8
7758 #define TRACING_LOG_LOC_MAX     128
7759
7760 #define CMD_PREFIX "  Command: "
7761
7762 struct err_info {
7763         const char      **errs; /* ptr to loc-specific array of err strings */
7764         u8              type;   /* index into errs -> specific err string */
7765         u16             pos;    /* caret position */
7766         u64             ts;
7767 };
7768
7769 struct tracing_log_err {
7770         struct list_head        list;
7771         struct err_info         info;
7772         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7773         char                    *cmd;                     /* what caused err */
7774 };
7775
7776 static DEFINE_MUTEX(tracing_err_log_lock);
7777
7778 static struct tracing_log_err *alloc_tracing_log_err(int len)
7779 {
7780         struct tracing_log_err *err;
7781
7782         err = kzalloc(sizeof(*err), GFP_KERNEL);
7783         if (!err)
7784                 return ERR_PTR(-ENOMEM);
7785
7786         err->cmd = kzalloc(len, GFP_KERNEL);
7787         if (!err->cmd) {
7788                 kfree(err);
7789                 return ERR_PTR(-ENOMEM);
7790         }
7791
7792         return err;
7793 }
7794
7795 static void free_tracing_log_err(struct tracing_log_err *err)
7796 {
7797         kfree(err->cmd);
7798         kfree(err);
7799 }
7800
7801 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7802                                                    int len)
7803 {
7804         struct tracing_log_err *err;
7805
7806         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7807                 err = alloc_tracing_log_err(len);
7808                 if (PTR_ERR(err) != -ENOMEM)
7809                         tr->n_err_log_entries++;
7810
7811                 return err;
7812         }
7813
7814         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7815         kfree(err->cmd);
7816         err->cmd = kzalloc(len, GFP_KERNEL);
7817         if (!err->cmd)
7818                 return ERR_PTR(-ENOMEM);
7819         list_del(&err->list);
7820
7821         return err;
7822 }
7823
7824 /**
7825  * err_pos - find the position of a string within a command for error careting
7826  * @cmd: The tracing command that caused the error
7827  * @str: The string to position the caret at within @cmd
7828  *
7829  * Finds the position of the first occurrence of @str within @cmd.  The
7830  * return value can be passed to tracing_log_err() for caret placement
7831  * within @cmd.
7832  *
7833  * Returns the index within @cmd of the first occurrence of @str or 0
7834  * if @str was not found.
7835  */
7836 unsigned int err_pos(char *cmd, const char *str)
7837 {
7838         char *found;
7839
7840         if (WARN_ON(!strlen(cmd)))
7841                 return 0;
7842
7843         found = strstr(cmd, str);
7844         if (found)
7845                 return found - cmd;
7846
7847         return 0;
7848 }
7849
7850 /**
7851  * tracing_log_err - write an error to the tracing error log
7852  * @tr: The associated trace array for the error (NULL for top level array)
7853  * @loc: A string describing where the error occurred
7854  * @cmd: The tracing command that caused the error
7855  * @errs: The array of loc-specific static error strings
7856  * @type: The index into errs[], which produces the specific static err string
7857  * @pos: The position the caret should be placed in the cmd
7858  *
7859  * Writes an error into tracing/error_log of the form:
7860  *
7861  * <loc>: error: <text>
7862  *   Command: <cmd>
7863  *              ^
7864  *
7865  * tracing/error_log is a small log file containing the last
7866  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7867  * unless there has been a tracing error, and the error log can be
7868  * cleared and have its memory freed by writing the empty string in
7869  * truncation mode to it i.e. echo > tracing/error_log.
7870  *
7871  * NOTE: the @errs array along with the @type param are used to
7872  * produce a static error string - this string is not copied and saved
7873  * when the error is logged - only a pointer to it is saved.  See
7874  * existing callers for examples of how static strings are typically
7875  * defined for use with tracing_log_err().
7876  */
7877 void tracing_log_err(struct trace_array *tr,
7878                      const char *loc, const char *cmd,
7879                      const char **errs, u8 type, u16 pos)
7880 {
7881         struct tracing_log_err *err;
7882         int len = 0;
7883
7884         if (!tr)
7885                 tr = &global_trace;
7886
7887         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7888
7889         mutex_lock(&tracing_err_log_lock);
7890         err = get_tracing_log_err(tr, len);
7891         if (PTR_ERR(err) == -ENOMEM) {
7892                 mutex_unlock(&tracing_err_log_lock);
7893                 return;
7894         }
7895
7896         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7897         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7898
7899         err->info.errs = errs;
7900         err->info.type = type;
7901         err->info.pos = pos;
7902         err->info.ts = local_clock();
7903
7904         list_add_tail(&err->list, &tr->err_log);
7905         mutex_unlock(&tracing_err_log_lock);
7906 }
7907
7908 static void clear_tracing_err_log(struct trace_array *tr)
7909 {
7910         struct tracing_log_err *err, *next;
7911
7912         mutex_lock(&tracing_err_log_lock);
7913         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7914                 list_del(&err->list);
7915                 free_tracing_log_err(err);
7916         }
7917
7918         tr->n_err_log_entries = 0;
7919         mutex_unlock(&tracing_err_log_lock);
7920 }
7921
7922 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7923 {
7924         struct trace_array *tr = m->private;
7925
7926         mutex_lock(&tracing_err_log_lock);
7927
7928         return seq_list_start(&tr->err_log, *pos);
7929 }
7930
7931 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7932 {
7933         struct trace_array *tr = m->private;
7934
7935         return seq_list_next(v, &tr->err_log, pos);
7936 }
7937
7938 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7939 {
7940         mutex_unlock(&tracing_err_log_lock);
7941 }
7942
7943 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7944 {
7945         u16 i;
7946
7947         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7948                 seq_putc(m, ' ');
7949         for (i = 0; i < pos; i++)
7950                 seq_putc(m, ' ');
7951         seq_puts(m, "^\n");
7952 }
7953
7954 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7955 {
7956         struct tracing_log_err *err = v;
7957
7958         if (err) {
7959                 const char *err_text = err->info.errs[err->info.type];
7960                 u64 sec = err->info.ts;
7961                 u32 nsec;
7962
7963                 nsec = do_div(sec, NSEC_PER_SEC);
7964                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7965                            err->loc, err_text);
7966                 seq_printf(m, "%s", err->cmd);
7967                 tracing_err_log_show_pos(m, err->info.pos);
7968         }
7969
7970         return 0;
7971 }
7972
7973 static const struct seq_operations tracing_err_log_seq_ops = {
7974         .start  = tracing_err_log_seq_start,
7975         .next   = tracing_err_log_seq_next,
7976         .stop   = tracing_err_log_seq_stop,
7977         .show   = tracing_err_log_seq_show
7978 };
7979
7980 static int tracing_err_log_open(struct inode *inode, struct file *file)
7981 {
7982         struct trace_array *tr = inode->i_private;
7983         int ret = 0;
7984
7985         ret = tracing_check_open_get_tr(tr);
7986         if (ret)
7987                 return ret;
7988
7989         /* If this file was opened for write, then erase contents */
7990         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7991                 clear_tracing_err_log(tr);
7992
7993         if (file->f_mode & FMODE_READ) {
7994                 ret = seq_open(file, &tracing_err_log_seq_ops);
7995                 if (!ret) {
7996                         struct seq_file *m = file->private_data;
7997                         m->private = tr;
7998                 } else {
7999                         trace_array_put(tr);
8000                 }
8001         }
8002         return ret;
8003 }
8004
8005 static ssize_t tracing_err_log_write(struct file *file,
8006                                      const char __user *buffer,
8007                                      size_t count, loff_t *ppos)
8008 {
8009         return count;
8010 }
8011
8012 static int tracing_err_log_release(struct inode *inode, struct file *file)
8013 {
8014         struct trace_array *tr = inode->i_private;
8015
8016         trace_array_put(tr);
8017
8018         if (file->f_mode & FMODE_READ)
8019                 seq_release(inode, file);
8020
8021         return 0;
8022 }
8023
8024 static const struct file_operations tracing_err_log_fops = {
8025         .open           = tracing_err_log_open,
8026         .write          = tracing_err_log_write,
8027         .read           = seq_read,
8028         .llseek         = seq_lseek,
8029         .release        = tracing_err_log_release,
8030 };
8031
8032 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8033 {
8034         struct trace_array *tr = inode->i_private;
8035         struct ftrace_buffer_info *info;
8036         int ret;
8037
8038         ret = tracing_check_open_get_tr(tr);
8039         if (ret)
8040                 return ret;
8041
8042         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8043         if (!info) {
8044                 trace_array_put(tr);
8045                 return -ENOMEM;
8046         }
8047
8048         mutex_lock(&trace_types_lock);
8049
8050         info->iter.tr           = tr;
8051         info->iter.cpu_file     = tracing_get_cpu(inode);
8052         info->iter.trace        = tr->current_trace;
8053         info->iter.array_buffer = &tr->array_buffer;
8054         info->spare             = NULL;
8055         /* Force reading ring buffer for first read */
8056         info->read              = (unsigned int)-1;
8057
8058         filp->private_data = info;
8059
8060         tr->trace_ref++;
8061
8062         mutex_unlock(&trace_types_lock);
8063
8064         ret = nonseekable_open(inode, filp);
8065         if (ret < 0)
8066                 trace_array_put(tr);
8067
8068         return ret;
8069 }
8070
8071 static __poll_t
8072 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8073 {
8074         struct ftrace_buffer_info *info = filp->private_data;
8075         struct trace_iterator *iter = &info->iter;
8076
8077         return trace_poll(iter, filp, poll_table);
8078 }
8079
8080 static ssize_t
8081 tracing_buffers_read(struct file *filp, char __user *ubuf,
8082                      size_t count, loff_t *ppos)
8083 {
8084         struct ftrace_buffer_info *info = filp->private_data;
8085         struct trace_iterator *iter = &info->iter;
8086         ssize_t ret = 0;
8087         ssize_t size;
8088
8089         if (!count)
8090                 return 0;
8091
8092 #ifdef CONFIG_TRACER_MAX_TRACE
8093         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8094                 return -EBUSY;
8095 #endif
8096
8097         if (!info->spare) {
8098                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8099                                                           iter->cpu_file);
8100                 if (IS_ERR(info->spare)) {
8101                         ret = PTR_ERR(info->spare);
8102                         info->spare = NULL;
8103                 } else {
8104                         info->spare_cpu = iter->cpu_file;
8105                 }
8106         }
8107         if (!info->spare)
8108                 return ret;
8109
8110         /* Do we have previous read data to read? */
8111         if (info->read < PAGE_SIZE)
8112                 goto read;
8113
8114  again:
8115         trace_access_lock(iter->cpu_file);
8116         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8117                                     &info->spare,
8118                                     count,
8119                                     iter->cpu_file, 0);
8120         trace_access_unlock(iter->cpu_file);
8121
8122         if (ret < 0) {
8123                 if (trace_empty(iter)) {
8124                         if ((filp->f_flags & O_NONBLOCK))
8125                                 return -EAGAIN;
8126
8127                         ret = wait_on_pipe(iter, 0);
8128                         if (ret)
8129                                 return ret;
8130
8131                         goto again;
8132                 }
8133                 return 0;
8134         }
8135
8136         info->read = 0;
8137  read:
8138         size = PAGE_SIZE - info->read;
8139         if (size > count)
8140                 size = count;
8141
8142         ret = copy_to_user(ubuf, info->spare + info->read, size);
8143         if (ret == size)
8144                 return -EFAULT;
8145
8146         size -= ret;
8147
8148         *ppos += size;
8149         info->read += size;
8150
8151         return size;
8152 }
8153
8154 static int tracing_buffers_release(struct inode *inode, struct file *file)
8155 {
8156         struct ftrace_buffer_info *info = file->private_data;
8157         struct trace_iterator *iter = &info->iter;
8158
8159         mutex_lock(&trace_types_lock);
8160
8161         iter->tr->trace_ref--;
8162
8163         __trace_array_put(iter->tr);
8164
8165         iter->wait_index++;
8166         /* Make sure the waiters see the new wait_index */
8167         smp_wmb();
8168
8169         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8170
8171         if (info->spare)
8172                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8173                                            info->spare_cpu, info->spare);
8174         kvfree(info);
8175
8176         mutex_unlock(&trace_types_lock);
8177
8178         return 0;
8179 }
8180
8181 struct buffer_ref {
8182         struct trace_buffer     *buffer;
8183         void                    *page;
8184         int                     cpu;
8185         refcount_t              refcount;
8186 };
8187
8188 static void buffer_ref_release(struct buffer_ref *ref)
8189 {
8190         if (!refcount_dec_and_test(&ref->refcount))
8191                 return;
8192         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8193         kfree(ref);
8194 }
8195
8196 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8197                                     struct pipe_buffer *buf)
8198 {
8199         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8200
8201         buffer_ref_release(ref);
8202         buf->private = 0;
8203 }
8204
8205 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8206                                 struct pipe_buffer *buf)
8207 {
8208         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8209
8210         if (refcount_read(&ref->refcount) > INT_MAX/2)
8211                 return false;
8212
8213         refcount_inc(&ref->refcount);
8214         return true;
8215 }
8216
8217 /* Pipe buffer operations for a buffer. */
8218 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8219         .release                = buffer_pipe_buf_release,
8220         .get                    = buffer_pipe_buf_get,
8221 };
8222
8223 /*
8224  * Callback from splice_to_pipe(), if we need to release some pages
8225  * at the end of the spd in case we error'ed out in filling the pipe.
8226  */
8227 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8228 {
8229         struct buffer_ref *ref =
8230                 (struct buffer_ref *)spd->partial[i].private;
8231
8232         buffer_ref_release(ref);
8233         spd->partial[i].private = 0;
8234 }
8235
8236 static ssize_t
8237 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8238                             struct pipe_inode_info *pipe, size_t len,
8239                             unsigned int flags)
8240 {
8241         struct ftrace_buffer_info *info = file->private_data;
8242         struct trace_iterator *iter = &info->iter;
8243         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8244         struct page *pages_def[PIPE_DEF_BUFFERS];
8245         struct splice_pipe_desc spd = {
8246                 .pages          = pages_def,
8247                 .partial        = partial_def,
8248                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8249                 .ops            = &buffer_pipe_buf_ops,
8250                 .spd_release    = buffer_spd_release,
8251         };
8252         struct buffer_ref *ref;
8253         int entries, i;
8254         ssize_t ret = 0;
8255
8256 #ifdef CONFIG_TRACER_MAX_TRACE
8257         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8258                 return -EBUSY;
8259 #endif
8260
8261         if (*ppos & (PAGE_SIZE - 1))
8262                 return -EINVAL;
8263
8264         if (len & (PAGE_SIZE - 1)) {
8265                 if (len < PAGE_SIZE)
8266                         return -EINVAL;
8267                 len &= PAGE_MASK;
8268         }
8269
8270         if (splice_grow_spd(pipe, &spd))
8271                 return -ENOMEM;
8272
8273  again:
8274         trace_access_lock(iter->cpu_file);
8275         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8276
8277         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8278                 struct page *page;
8279                 int r;
8280
8281                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8282                 if (!ref) {
8283                         ret = -ENOMEM;
8284                         break;
8285                 }
8286
8287                 refcount_set(&ref->refcount, 1);
8288                 ref->buffer = iter->array_buffer->buffer;
8289                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8290                 if (IS_ERR(ref->page)) {
8291                         ret = PTR_ERR(ref->page);
8292                         ref->page = NULL;
8293                         kfree(ref);
8294                         break;
8295                 }
8296                 ref->cpu = iter->cpu_file;
8297
8298                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8299                                           len, iter->cpu_file, 1);
8300                 if (r < 0) {
8301                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8302                                                    ref->page);
8303                         kfree(ref);
8304                         break;
8305                 }
8306
8307                 page = virt_to_page(ref->page);
8308
8309                 spd.pages[i] = page;
8310                 spd.partial[i].len = PAGE_SIZE;
8311                 spd.partial[i].offset = 0;
8312                 spd.partial[i].private = (unsigned long)ref;
8313                 spd.nr_pages++;
8314                 *ppos += PAGE_SIZE;
8315
8316                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8317         }
8318
8319         trace_access_unlock(iter->cpu_file);
8320         spd.nr_pages = i;
8321
8322         /* did we read anything? */
8323         if (!spd.nr_pages) {
8324                 long wait_index;
8325
8326                 if (ret)
8327                         goto out;
8328
8329                 ret = -EAGAIN;
8330                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8331                         goto out;
8332
8333                 wait_index = READ_ONCE(iter->wait_index);
8334
8335                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8336                 if (ret)
8337                         goto out;
8338
8339                 /* No need to wait after waking up when tracing is off */
8340                 if (!tracer_tracing_is_on(iter->tr))
8341                         goto out;
8342
8343                 /* Make sure we see the new wait_index */
8344                 smp_rmb();
8345                 if (wait_index != iter->wait_index)
8346                         goto out;
8347
8348                 goto again;
8349         }
8350
8351         ret = splice_to_pipe(pipe, &spd);
8352 out:
8353         splice_shrink_spd(&spd);
8354
8355         return ret;
8356 }
8357
8358 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8359 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8360 {
8361         struct ftrace_buffer_info *info = file->private_data;
8362         struct trace_iterator *iter = &info->iter;
8363
8364         if (cmd)
8365                 return -ENOIOCTLCMD;
8366
8367         mutex_lock(&trace_types_lock);
8368
8369         iter->wait_index++;
8370         /* Make sure the waiters see the new wait_index */
8371         smp_wmb();
8372
8373         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8374
8375         mutex_unlock(&trace_types_lock);
8376         return 0;
8377 }
8378
8379 static const struct file_operations tracing_buffers_fops = {
8380         .open           = tracing_buffers_open,
8381         .read           = tracing_buffers_read,
8382         .poll           = tracing_buffers_poll,
8383         .release        = tracing_buffers_release,
8384         .splice_read    = tracing_buffers_splice_read,
8385         .unlocked_ioctl = tracing_buffers_ioctl,
8386         .llseek         = no_llseek,
8387 };
8388
8389 static ssize_t
8390 tracing_stats_read(struct file *filp, char __user *ubuf,
8391                    size_t count, loff_t *ppos)
8392 {
8393         struct inode *inode = file_inode(filp);
8394         struct trace_array *tr = inode->i_private;
8395         struct array_buffer *trace_buf = &tr->array_buffer;
8396         int cpu = tracing_get_cpu(inode);
8397         struct trace_seq *s;
8398         unsigned long cnt;
8399         unsigned long long t;
8400         unsigned long usec_rem;
8401
8402         s = kmalloc(sizeof(*s), GFP_KERNEL);
8403         if (!s)
8404                 return -ENOMEM;
8405
8406         trace_seq_init(s);
8407
8408         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8409         trace_seq_printf(s, "entries: %ld\n", cnt);
8410
8411         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8412         trace_seq_printf(s, "overrun: %ld\n", cnt);
8413
8414         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8415         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8416
8417         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8418         trace_seq_printf(s, "bytes: %ld\n", cnt);
8419
8420         if (trace_clocks[tr->clock_id].in_ns) {
8421                 /* local or global for trace_clock */
8422                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8423                 usec_rem = do_div(t, USEC_PER_SEC);
8424                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8425                                                                 t, usec_rem);
8426
8427                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8428                 usec_rem = do_div(t, USEC_PER_SEC);
8429                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8430         } else {
8431                 /* counter or tsc mode for trace_clock */
8432                 trace_seq_printf(s, "oldest event ts: %llu\n",
8433                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8434
8435                 trace_seq_printf(s, "now ts: %llu\n",
8436                                 ring_buffer_time_stamp(trace_buf->buffer));
8437         }
8438
8439         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8440         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8441
8442         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8443         trace_seq_printf(s, "read events: %ld\n", cnt);
8444
8445         count = simple_read_from_buffer(ubuf, count, ppos,
8446                                         s->buffer, trace_seq_used(s));
8447
8448         kfree(s);
8449
8450         return count;
8451 }
8452
8453 static const struct file_operations tracing_stats_fops = {
8454         .open           = tracing_open_generic_tr,
8455         .read           = tracing_stats_read,
8456         .llseek         = generic_file_llseek,
8457         .release        = tracing_release_generic_tr,
8458 };
8459
8460 #ifdef CONFIG_DYNAMIC_FTRACE
8461
8462 static ssize_t
8463 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8464                   size_t cnt, loff_t *ppos)
8465 {
8466         ssize_t ret;
8467         char *buf;
8468         int r;
8469
8470         /* 256 should be plenty to hold the amount needed */
8471         buf = kmalloc(256, GFP_KERNEL);
8472         if (!buf)
8473                 return -ENOMEM;
8474
8475         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8476                       ftrace_update_tot_cnt,
8477                       ftrace_number_of_pages,
8478                       ftrace_number_of_groups);
8479
8480         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8481         kfree(buf);
8482         return ret;
8483 }
8484
8485 static const struct file_operations tracing_dyn_info_fops = {
8486         .open           = tracing_open_generic,
8487         .read           = tracing_read_dyn_info,
8488         .llseek         = generic_file_llseek,
8489 };
8490 #endif /* CONFIG_DYNAMIC_FTRACE */
8491
8492 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8493 static void
8494 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8495                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8496                 void *data)
8497 {
8498         tracing_snapshot_instance(tr);
8499 }
8500
8501 static void
8502 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8503                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8504                       void *data)
8505 {
8506         struct ftrace_func_mapper *mapper = data;
8507         long *count = NULL;
8508
8509         if (mapper)
8510                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8511
8512         if (count) {
8513
8514                 if (*count <= 0)
8515                         return;
8516
8517                 (*count)--;
8518         }
8519
8520         tracing_snapshot_instance(tr);
8521 }
8522
8523 static int
8524 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8525                       struct ftrace_probe_ops *ops, void *data)
8526 {
8527         struct ftrace_func_mapper *mapper = data;
8528         long *count = NULL;
8529
8530         seq_printf(m, "%ps:", (void *)ip);
8531
8532         seq_puts(m, "snapshot");
8533
8534         if (mapper)
8535                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8536
8537         if (count)
8538                 seq_printf(m, ":count=%ld\n", *count);
8539         else
8540                 seq_puts(m, ":unlimited\n");
8541
8542         return 0;
8543 }
8544
8545 static int
8546 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8547                      unsigned long ip, void *init_data, void **data)
8548 {
8549         struct ftrace_func_mapper *mapper = *data;
8550
8551         if (!mapper) {
8552                 mapper = allocate_ftrace_func_mapper();
8553                 if (!mapper)
8554                         return -ENOMEM;
8555                 *data = mapper;
8556         }
8557
8558         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8559 }
8560
8561 static void
8562 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8563                      unsigned long ip, void *data)
8564 {
8565         struct ftrace_func_mapper *mapper = data;
8566
8567         if (!ip) {
8568                 if (!mapper)
8569                         return;
8570                 free_ftrace_func_mapper(mapper, NULL);
8571                 return;
8572         }
8573
8574         ftrace_func_mapper_remove_ip(mapper, ip);
8575 }
8576
8577 static struct ftrace_probe_ops snapshot_probe_ops = {
8578         .func                   = ftrace_snapshot,
8579         .print                  = ftrace_snapshot_print,
8580 };
8581
8582 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8583         .func                   = ftrace_count_snapshot,
8584         .print                  = ftrace_snapshot_print,
8585         .init                   = ftrace_snapshot_init,
8586         .free                   = ftrace_snapshot_free,
8587 };
8588
8589 static int
8590 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8591                                char *glob, char *cmd, char *param, int enable)
8592 {
8593         struct ftrace_probe_ops *ops;
8594         void *count = (void *)-1;
8595         char *number;
8596         int ret;
8597
8598         if (!tr)
8599                 return -ENODEV;
8600
8601         /* hash funcs only work with set_ftrace_filter */
8602         if (!enable)
8603                 return -EINVAL;
8604
8605         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8606
8607         if (glob[0] == '!')
8608                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8609
8610         if (!param)
8611                 goto out_reg;
8612
8613         number = strsep(&param, ":");
8614
8615         if (!strlen(number))
8616                 goto out_reg;
8617
8618         /*
8619          * We use the callback data field (which is a pointer)
8620          * as our counter.
8621          */
8622         ret = kstrtoul(number, 0, (unsigned long *)&count);
8623         if (ret)
8624                 return ret;
8625
8626  out_reg:
8627         ret = tracing_alloc_snapshot_instance(tr);
8628         if (ret < 0)
8629                 goto out;
8630
8631         ret = register_ftrace_function_probe(glob, tr, ops, count);
8632
8633  out:
8634         return ret < 0 ? ret : 0;
8635 }
8636
8637 static struct ftrace_func_command ftrace_snapshot_cmd = {
8638         .name                   = "snapshot",
8639         .func                   = ftrace_trace_snapshot_callback,
8640 };
8641
8642 static __init int register_snapshot_cmd(void)
8643 {
8644         return register_ftrace_command(&ftrace_snapshot_cmd);
8645 }
8646 #else
8647 static inline __init int register_snapshot_cmd(void) { return 0; }
8648 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8649
8650 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8651 {
8652         if (WARN_ON(!tr->dir))
8653                 return ERR_PTR(-ENODEV);
8654
8655         /* Top directory uses NULL as the parent */
8656         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8657                 return NULL;
8658
8659         /* All sub buffers have a descriptor */
8660         return tr->dir;
8661 }
8662
8663 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8664 {
8665         struct dentry *d_tracer;
8666
8667         if (tr->percpu_dir)
8668                 return tr->percpu_dir;
8669
8670         d_tracer = tracing_get_dentry(tr);
8671         if (IS_ERR(d_tracer))
8672                 return NULL;
8673
8674         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8675
8676         MEM_FAIL(!tr->percpu_dir,
8677                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8678
8679         return tr->percpu_dir;
8680 }
8681
8682 static struct dentry *
8683 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8684                       void *data, long cpu, const struct file_operations *fops)
8685 {
8686         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8687
8688         if (ret) /* See tracing_get_cpu() */
8689                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8690         return ret;
8691 }
8692
8693 static void
8694 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8695 {
8696         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8697         struct dentry *d_cpu;
8698         char cpu_dir[30]; /* 30 characters should be more than enough */
8699
8700         if (!d_percpu)
8701                 return;
8702
8703         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8704         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8705         if (!d_cpu) {
8706                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8707                 return;
8708         }
8709
8710         /* per cpu trace_pipe */
8711         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8712                                 tr, cpu, &tracing_pipe_fops);
8713
8714         /* per cpu trace */
8715         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8716                                 tr, cpu, &tracing_fops);
8717
8718         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8719                                 tr, cpu, &tracing_buffers_fops);
8720
8721         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8722                                 tr, cpu, &tracing_stats_fops);
8723
8724         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8725                                 tr, cpu, &tracing_entries_fops);
8726
8727 #ifdef CONFIG_TRACER_SNAPSHOT
8728         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8729                                 tr, cpu, &snapshot_fops);
8730
8731         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8732                                 tr, cpu, &snapshot_raw_fops);
8733 #endif
8734 }
8735
8736 #ifdef CONFIG_FTRACE_SELFTEST
8737 /* Let selftest have access to static functions in this file */
8738 #include "trace_selftest.c"
8739 #endif
8740
8741 static ssize_t
8742 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8743                         loff_t *ppos)
8744 {
8745         struct trace_option_dentry *topt = filp->private_data;
8746         char *buf;
8747
8748         if (topt->flags->val & topt->opt->bit)
8749                 buf = "1\n";
8750         else
8751                 buf = "0\n";
8752
8753         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8754 }
8755
8756 static ssize_t
8757 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8758                          loff_t *ppos)
8759 {
8760         struct trace_option_dentry *topt = filp->private_data;
8761         unsigned long val;
8762         int ret;
8763
8764         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8765         if (ret)
8766                 return ret;
8767
8768         if (val != 0 && val != 1)
8769                 return -EINVAL;
8770
8771         if (!!(topt->flags->val & topt->opt->bit) != val) {
8772                 mutex_lock(&trace_types_lock);
8773                 ret = __set_tracer_option(topt->tr, topt->flags,
8774                                           topt->opt, !val);
8775                 mutex_unlock(&trace_types_lock);
8776                 if (ret)
8777                         return ret;
8778         }
8779
8780         *ppos += cnt;
8781
8782         return cnt;
8783 }
8784
8785
8786 static const struct file_operations trace_options_fops = {
8787         .open = tracing_open_generic,
8788         .read = trace_options_read,
8789         .write = trace_options_write,
8790         .llseek = generic_file_llseek,
8791 };
8792
8793 /*
8794  * In order to pass in both the trace_array descriptor as well as the index
8795  * to the flag that the trace option file represents, the trace_array
8796  * has a character array of trace_flags_index[], which holds the index
8797  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8798  * The address of this character array is passed to the flag option file
8799  * read/write callbacks.
8800  *
8801  * In order to extract both the index and the trace_array descriptor,
8802  * get_tr_index() uses the following algorithm.
8803  *
8804  *   idx = *ptr;
8805  *
8806  * As the pointer itself contains the address of the index (remember
8807  * index[1] == 1).
8808  *
8809  * Then to get the trace_array descriptor, by subtracting that index
8810  * from the ptr, we get to the start of the index itself.
8811  *
8812  *   ptr - idx == &index[0]
8813  *
8814  * Then a simple container_of() from that pointer gets us to the
8815  * trace_array descriptor.
8816  */
8817 static void get_tr_index(void *data, struct trace_array **ptr,
8818                          unsigned int *pindex)
8819 {
8820         *pindex = *(unsigned char *)data;
8821
8822         *ptr = container_of(data - *pindex, struct trace_array,
8823                             trace_flags_index);
8824 }
8825
8826 static ssize_t
8827 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8828                         loff_t *ppos)
8829 {
8830         void *tr_index = filp->private_data;
8831         struct trace_array *tr;
8832         unsigned int index;
8833         char *buf;
8834
8835         get_tr_index(tr_index, &tr, &index);
8836
8837         if (tr->trace_flags & (1 << index))
8838                 buf = "1\n";
8839         else
8840                 buf = "0\n";
8841
8842         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8843 }
8844
8845 static ssize_t
8846 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8847                          loff_t *ppos)
8848 {
8849         void *tr_index = filp->private_data;
8850         struct trace_array *tr;
8851         unsigned int index;
8852         unsigned long val;
8853         int ret;
8854
8855         get_tr_index(tr_index, &tr, &index);
8856
8857         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8858         if (ret)
8859                 return ret;
8860
8861         if (val != 0 && val != 1)
8862                 return -EINVAL;
8863
8864         mutex_lock(&event_mutex);
8865         mutex_lock(&trace_types_lock);
8866         ret = set_tracer_flag(tr, 1 << index, val);
8867         mutex_unlock(&trace_types_lock);
8868         mutex_unlock(&event_mutex);
8869
8870         if (ret < 0)
8871                 return ret;
8872
8873         *ppos += cnt;
8874
8875         return cnt;
8876 }
8877
8878 static const struct file_operations trace_options_core_fops = {
8879         .open = tracing_open_generic,
8880         .read = trace_options_core_read,
8881         .write = trace_options_core_write,
8882         .llseek = generic_file_llseek,
8883 };
8884
8885 struct dentry *trace_create_file(const char *name,
8886                                  umode_t mode,
8887                                  struct dentry *parent,
8888                                  void *data,
8889                                  const struct file_operations *fops)
8890 {
8891         struct dentry *ret;
8892
8893         ret = tracefs_create_file(name, mode, parent, data, fops);
8894         if (!ret)
8895                 pr_warn("Could not create tracefs '%s' entry\n", name);
8896
8897         return ret;
8898 }
8899
8900
8901 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8902 {
8903         struct dentry *d_tracer;
8904
8905         if (tr->options)
8906                 return tr->options;
8907
8908         d_tracer = tracing_get_dentry(tr);
8909         if (IS_ERR(d_tracer))
8910                 return NULL;
8911
8912         tr->options = tracefs_create_dir("options", d_tracer);
8913         if (!tr->options) {
8914                 pr_warn("Could not create tracefs directory 'options'\n");
8915                 return NULL;
8916         }
8917
8918         return tr->options;
8919 }
8920
8921 static void
8922 create_trace_option_file(struct trace_array *tr,
8923                          struct trace_option_dentry *topt,
8924                          struct tracer_flags *flags,
8925                          struct tracer_opt *opt)
8926 {
8927         struct dentry *t_options;
8928
8929         t_options = trace_options_init_dentry(tr);
8930         if (!t_options)
8931                 return;
8932
8933         topt->flags = flags;
8934         topt->opt = opt;
8935         topt->tr = tr;
8936
8937         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8938                                         t_options, topt, &trace_options_fops);
8939
8940 }
8941
8942 static void
8943 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8944 {
8945         struct trace_option_dentry *topts;
8946         struct trace_options *tr_topts;
8947         struct tracer_flags *flags;
8948         struct tracer_opt *opts;
8949         int cnt;
8950         int i;
8951
8952         if (!tracer)
8953                 return;
8954
8955         flags = tracer->flags;
8956
8957         if (!flags || !flags->opts)
8958                 return;
8959
8960         /*
8961          * If this is an instance, only create flags for tracers
8962          * the instance may have.
8963          */
8964         if (!trace_ok_for_array(tracer, tr))
8965                 return;
8966
8967         for (i = 0; i < tr->nr_topts; i++) {
8968                 /* Make sure there's no duplicate flags. */
8969                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8970                         return;
8971         }
8972
8973         opts = flags->opts;
8974
8975         for (cnt = 0; opts[cnt].name; cnt++)
8976                 ;
8977
8978         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8979         if (!topts)
8980                 return;
8981
8982         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8983                             GFP_KERNEL);
8984         if (!tr_topts) {
8985                 kfree(topts);
8986                 return;
8987         }
8988
8989         tr->topts = tr_topts;
8990         tr->topts[tr->nr_topts].tracer = tracer;
8991         tr->topts[tr->nr_topts].topts = topts;
8992         tr->nr_topts++;
8993
8994         for (cnt = 0; opts[cnt].name; cnt++) {
8995                 create_trace_option_file(tr, &topts[cnt], flags,
8996                                          &opts[cnt]);
8997                 MEM_FAIL(topts[cnt].entry == NULL,
8998                           "Failed to create trace option: %s",
8999                           opts[cnt].name);
9000         }
9001 }
9002
9003 static struct dentry *
9004 create_trace_option_core_file(struct trace_array *tr,
9005                               const char *option, long index)
9006 {
9007         struct dentry *t_options;
9008
9009         t_options = trace_options_init_dentry(tr);
9010         if (!t_options)
9011                 return NULL;
9012
9013         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9014                                  (void *)&tr->trace_flags_index[index],
9015                                  &trace_options_core_fops);
9016 }
9017
9018 static void create_trace_options_dir(struct trace_array *tr)
9019 {
9020         struct dentry *t_options;
9021         bool top_level = tr == &global_trace;
9022         int i;
9023
9024         t_options = trace_options_init_dentry(tr);
9025         if (!t_options)
9026                 return;
9027
9028         for (i = 0; trace_options[i]; i++) {
9029                 if (top_level ||
9030                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9031                         create_trace_option_core_file(tr, trace_options[i], i);
9032         }
9033 }
9034
9035 static ssize_t
9036 rb_simple_read(struct file *filp, char __user *ubuf,
9037                size_t cnt, loff_t *ppos)
9038 {
9039         struct trace_array *tr = filp->private_data;
9040         char buf[64];
9041         int r;
9042
9043         r = tracer_tracing_is_on(tr);
9044         r = sprintf(buf, "%d\n", r);
9045
9046         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9047 }
9048
9049 static ssize_t
9050 rb_simple_write(struct file *filp, const char __user *ubuf,
9051                 size_t cnt, loff_t *ppos)
9052 {
9053         struct trace_array *tr = filp->private_data;
9054         struct trace_buffer *buffer = tr->array_buffer.buffer;
9055         unsigned long val;
9056         int ret;
9057
9058         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9059         if (ret)
9060                 return ret;
9061
9062         if (buffer) {
9063                 mutex_lock(&trace_types_lock);
9064                 if (!!val == tracer_tracing_is_on(tr)) {
9065                         val = 0; /* do nothing */
9066                 } else if (val) {
9067                         tracer_tracing_on(tr);
9068                         if (tr->current_trace->start)
9069                                 tr->current_trace->start(tr);
9070                 } else {
9071                         tracer_tracing_off(tr);
9072                         if (tr->current_trace->stop)
9073                                 tr->current_trace->stop(tr);
9074                         /* Wake up any waiters */
9075                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9076                 }
9077                 mutex_unlock(&trace_types_lock);
9078         }
9079
9080         (*ppos)++;
9081
9082         return cnt;
9083 }
9084
9085 static const struct file_operations rb_simple_fops = {
9086         .open           = tracing_open_generic_tr,
9087         .read           = rb_simple_read,
9088         .write          = rb_simple_write,
9089         .release        = tracing_release_generic_tr,
9090         .llseek         = default_llseek,
9091 };
9092
9093 static ssize_t
9094 buffer_percent_read(struct file *filp, char __user *ubuf,
9095                     size_t cnt, loff_t *ppos)
9096 {
9097         struct trace_array *tr = filp->private_data;
9098         char buf[64];
9099         int r;
9100
9101         r = tr->buffer_percent;
9102         r = sprintf(buf, "%d\n", r);
9103
9104         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9105 }
9106
9107 static ssize_t
9108 buffer_percent_write(struct file *filp, const char __user *ubuf,
9109                      size_t cnt, loff_t *ppos)
9110 {
9111         struct trace_array *tr = filp->private_data;
9112         unsigned long val;
9113         int ret;
9114
9115         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9116         if (ret)
9117                 return ret;
9118
9119         if (val > 100)
9120                 return -EINVAL;
9121
9122         if (!val)
9123                 val = 1;
9124
9125         tr->buffer_percent = val;
9126
9127         (*ppos)++;
9128
9129         return cnt;
9130 }
9131
9132 static const struct file_operations buffer_percent_fops = {
9133         .open           = tracing_open_generic_tr,
9134         .read           = buffer_percent_read,
9135         .write          = buffer_percent_write,
9136         .release        = tracing_release_generic_tr,
9137         .llseek         = default_llseek,
9138 };
9139
9140 static struct dentry *trace_instance_dir;
9141
9142 static void
9143 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9144
9145 static int
9146 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9147 {
9148         enum ring_buffer_flags rb_flags;
9149
9150         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9151
9152         buf->tr = tr;
9153
9154         buf->buffer = ring_buffer_alloc(size, rb_flags);
9155         if (!buf->buffer)
9156                 return -ENOMEM;
9157
9158         buf->data = alloc_percpu(struct trace_array_cpu);
9159         if (!buf->data) {
9160                 ring_buffer_free(buf->buffer);
9161                 buf->buffer = NULL;
9162                 return -ENOMEM;
9163         }
9164
9165         /* Allocate the first page for all buffers */
9166         set_buffer_entries(&tr->array_buffer,
9167                            ring_buffer_size(tr->array_buffer.buffer, 0));
9168
9169         return 0;
9170 }
9171
9172 static void free_trace_buffer(struct array_buffer *buf)
9173 {
9174         if (buf->buffer) {
9175                 ring_buffer_free(buf->buffer);
9176                 buf->buffer = NULL;
9177                 free_percpu(buf->data);
9178                 buf->data = NULL;
9179         }
9180 }
9181
9182 static int allocate_trace_buffers(struct trace_array *tr, int size)
9183 {
9184         int ret;
9185
9186         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9187         if (ret)
9188                 return ret;
9189
9190 #ifdef CONFIG_TRACER_MAX_TRACE
9191         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9192                                     allocate_snapshot ? size : 1);
9193         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9194                 free_trace_buffer(&tr->array_buffer);
9195                 return -ENOMEM;
9196         }
9197         tr->allocated_snapshot = allocate_snapshot;
9198
9199         /*
9200          * Only the top level trace array gets its snapshot allocated
9201          * from the kernel command line.
9202          */
9203         allocate_snapshot = false;
9204 #endif
9205
9206         return 0;
9207 }
9208
9209 static void free_trace_buffers(struct trace_array *tr)
9210 {
9211         if (!tr)
9212                 return;
9213
9214         free_trace_buffer(&tr->array_buffer);
9215
9216 #ifdef CONFIG_TRACER_MAX_TRACE
9217         free_trace_buffer(&tr->max_buffer);
9218 #endif
9219 }
9220
9221 static void init_trace_flags_index(struct trace_array *tr)
9222 {
9223         int i;
9224
9225         /* Used by the trace options files */
9226         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9227                 tr->trace_flags_index[i] = i;
9228 }
9229
9230 static void __update_tracer_options(struct trace_array *tr)
9231 {
9232         struct tracer *t;
9233
9234         for (t = trace_types; t; t = t->next)
9235                 add_tracer_options(tr, t);
9236 }
9237
9238 static void update_tracer_options(struct trace_array *tr)
9239 {
9240         mutex_lock(&trace_types_lock);
9241         tracer_options_updated = true;
9242         __update_tracer_options(tr);
9243         mutex_unlock(&trace_types_lock);
9244 }
9245
9246 /* Must have trace_types_lock held */
9247 struct trace_array *trace_array_find(const char *instance)
9248 {
9249         struct trace_array *tr, *found = NULL;
9250
9251         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9252                 if (tr->name && strcmp(tr->name, instance) == 0) {
9253                         found = tr;
9254                         break;
9255                 }
9256         }
9257
9258         return found;
9259 }
9260
9261 struct trace_array *trace_array_find_get(const char *instance)
9262 {
9263         struct trace_array *tr;
9264
9265         mutex_lock(&trace_types_lock);
9266         tr = trace_array_find(instance);
9267         if (tr)
9268                 tr->ref++;
9269         mutex_unlock(&trace_types_lock);
9270
9271         return tr;
9272 }
9273
9274 static int trace_array_create_dir(struct trace_array *tr)
9275 {
9276         int ret;
9277
9278         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9279         if (!tr->dir)
9280                 return -EINVAL;
9281
9282         ret = event_trace_add_tracer(tr->dir, tr);
9283         if (ret) {
9284                 tracefs_remove(tr->dir);
9285                 return ret;
9286         }
9287
9288         init_tracer_tracefs(tr, tr->dir);
9289         __update_tracer_options(tr);
9290
9291         return ret;
9292 }
9293
9294 static struct trace_array *trace_array_create(const char *name)
9295 {
9296         struct trace_array *tr;
9297         int ret;
9298
9299         ret = -ENOMEM;
9300         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9301         if (!tr)
9302                 return ERR_PTR(ret);
9303
9304         tr->name = kstrdup(name, GFP_KERNEL);
9305         if (!tr->name)
9306                 goto out_free_tr;
9307
9308         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9309                 goto out_free_tr;
9310
9311         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9312
9313         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9314
9315         raw_spin_lock_init(&tr->start_lock);
9316
9317         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9318
9319         tr->current_trace = &nop_trace;
9320
9321         INIT_LIST_HEAD(&tr->systems);
9322         INIT_LIST_HEAD(&tr->events);
9323         INIT_LIST_HEAD(&tr->hist_vars);
9324         INIT_LIST_HEAD(&tr->err_log);
9325
9326         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9327                 goto out_free_tr;
9328
9329         if (ftrace_allocate_ftrace_ops(tr) < 0)
9330                 goto out_free_tr;
9331
9332         ftrace_init_trace_array(tr);
9333
9334         init_trace_flags_index(tr);
9335
9336         if (trace_instance_dir) {
9337                 ret = trace_array_create_dir(tr);
9338                 if (ret)
9339                         goto out_free_tr;
9340         } else
9341                 __trace_early_add_events(tr);
9342
9343         list_add(&tr->list, &ftrace_trace_arrays);
9344
9345         tr->ref++;
9346
9347         return tr;
9348
9349  out_free_tr:
9350         ftrace_free_ftrace_ops(tr);
9351         free_trace_buffers(tr);
9352         free_cpumask_var(tr->tracing_cpumask);
9353         kfree(tr->name);
9354         kfree(tr);
9355
9356         return ERR_PTR(ret);
9357 }
9358
9359 static int instance_mkdir(const char *name)
9360 {
9361         struct trace_array *tr;
9362         int ret;
9363
9364         mutex_lock(&event_mutex);
9365         mutex_lock(&trace_types_lock);
9366
9367         ret = -EEXIST;
9368         if (trace_array_find(name))
9369                 goto out_unlock;
9370
9371         tr = trace_array_create(name);
9372
9373         ret = PTR_ERR_OR_ZERO(tr);
9374
9375 out_unlock:
9376         mutex_unlock(&trace_types_lock);
9377         mutex_unlock(&event_mutex);
9378         return ret;
9379 }
9380
9381 /**
9382  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9383  * @name: The name of the trace array to be looked up/created.
9384  *
9385  * Returns pointer to trace array with given name.
9386  * NULL, if it cannot be created.
9387  *
9388  * NOTE: This function increments the reference counter associated with the
9389  * trace array returned. This makes sure it cannot be freed while in use.
9390  * Use trace_array_put() once the trace array is no longer needed.
9391  * If the trace_array is to be freed, trace_array_destroy() needs to
9392  * be called after the trace_array_put(), or simply let user space delete
9393  * it from the tracefs instances directory. But until the
9394  * trace_array_put() is called, user space can not delete it.
9395  *
9396  */
9397 struct trace_array *trace_array_get_by_name(const char *name)
9398 {
9399         struct trace_array *tr;
9400
9401         mutex_lock(&event_mutex);
9402         mutex_lock(&trace_types_lock);
9403
9404         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9405                 if (tr->name && strcmp(tr->name, name) == 0)
9406                         goto out_unlock;
9407         }
9408
9409         tr = trace_array_create(name);
9410
9411         if (IS_ERR(tr))
9412                 tr = NULL;
9413 out_unlock:
9414         if (tr)
9415                 tr->ref++;
9416
9417         mutex_unlock(&trace_types_lock);
9418         mutex_unlock(&event_mutex);
9419         return tr;
9420 }
9421 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9422
9423 static int __remove_instance(struct trace_array *tr)
9424 {
9425         int i;
9426
9427         /* Reference counter for a newly created trace array = 1. */
9428         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9429                 return -EBUSY;
9430
9431         list_del(&tr->list);
9432
9433         /* Disable all the flags that were enabled coming in */
9434         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9435                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9436                         set_tracer_flag(tr, 1 << i, 0);
9437         }
9438
9439         tracing_set_nop(tr);
9440         clear_ftrace_function_probes(tr);
9441         event_trace_del_tracer(tr);
9442         ftrace_clear_pids(tr);
9443         ftrace_destroy_function_files(tr);
9444         tracefs_remove(tr->dir);
9445         free_percpu(tr->last_func_repeats);
9446         free_trace_buffers(tr);
9447
9448         for (i = 0; i < tr->nr_topts; i++) {
9449                 kfree(tr->topts[i].topts);
9450         }
9451         kfree(tr->topts);
9452
9453         free_cpumask_var(tr->tracing_cpumask);
9454         kfree(tr->name);
9455         kfree(tr);
9456
9457         return 0;
9458 }
9459
9460 int trace_array_destroy(struct trace_array *this_tr)
9461 {
9462         struct trace_array *tr;
9463         int ret;
9464
9465         if (!this_tr)
9466                 return -EINVAL;
9467
9468         mutex_lock(&event_mutex);
9469         mutex_lock(&trace_types_lock);
9470
9471         ret = -ENODEV;
9472
9473         /* Making sure trace array exists before destroying it. */
9474         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9475                 if (tr == this_tr) {
9476                         ret = __remove_instance(tr);
9477                         break;
9478                 }
9479         }
9480
9481         mutex_unlock(&trace_types_lock);
9482         mutex_unlock(&event_mutex);
9483
9484         return ret;
9485 }
9486 EXPORT_SYMBOL_GPL(trace_array_destroy);
9487
9488 static int instance_rmdir(const char *name)
9489 {
9490         struct trace_array *tr;
9491         int ret;
9492
9493         mutex_lock(&event_mutex);
9494         mutex_lock(&trace_types_lock);
9495
9496         ret = -ENODEV;
9497         tr = trace_array_find(name);
9498         if (tr)
9499                 ret = __remove_instance(tr);
9500
9501         mutex_unlock(&trace_types_lock);
9502         mutex_unlock(&event_mutex);
9503
9504         return ret;
9505 }
9506
9507 static __init void create_trace_instances(struct dentry *d_tracer)
9508 {
9509         struct trace_array *tr;
9510
9511         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9512                                                          instance_mkdir,
9513                                                          instance_rmdir);
9514         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9515                 return;
9516
9517         mutex_lock(&event_mutex);
9518         mutex_lock(&trace_types_lock);
9519
9520         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9521                 if (!tr->name)
9522                         continue;
9523                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9524                              "Failed to create instance directory\n"))
9525                         break;
9526         }
9527
9528         mutex_unlock(&trace_types_lock);
9529         mutex_unlock(&event_mutex);
9530 }
9531
9532 static void
9533 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9534 {
9535         struct trace_event_file *file;
9536         int cpu;
9537
9538         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9539                         tr, &show_traces_fops);
9540
9541         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9542                         tr, &set_tracer_fops);
9543
9544         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9545                           tr, &tracing_cpumask_fops);
9546
9547         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9548                           tr, &tracing_iter_fops);
9549
9550         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9551                           tr, &tracing_fops);
9552
9553         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9554                           tr, &tracing_pipe_fops);
9555
9556         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9557                           tr, &tracing_entries_fops);
9558
9559         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9560                           tr, &tracing_total_entries_fops);
9561
9562         trace_create_file("free_buffer", 0200, d_tracer,
9563                           tr, &tracing_free_buffer_fops);
9564
9565         trace_create_file("trace_marker", 0220, d_tracer,
9566                           tr, &tracing_mark_fops);
9567
9568         file = __find_event_file(tr, "ftrace", "print");
9569         if (file && file->dir)
9570                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9571                                   file, &event_trigger_fops);
9572         tr->trace_marker_file = file;
9573
9574         trace_create_file("trace_marker_raw", 0220, d_tracer,
9575                           tr, &tracing_mark_raw_fops);
9576
9577         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9578                           &trace_clock_fops);
9579
9580         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9581                           tr, &rb_simple_fops);
9582
9583         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9584                           &trace_time_stamp_mode_fops);
9585
9586         tr->buffer_percent = 50;
9587
9588         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9589                         tr, &buffer_percent_fops);
9590
9591         create_trace_options_dir(tr);
9592
9593         trace_create_maxlat_file(tr, d_tracer);
9594
9595         if (ftrace_create_function_files(tr, d_tracer))
9596                 MEM_FAIL(1, "Could not allocate function filter files");
9597
9598 #ifdef CONFIG_TRACER_SNAPSHOT
9599         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9600                           tr, &snapshot_fops);
9601 #endif
9602
9603         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9604                           tr, &tracing_err_log_fops);
9605
9606         for_each_tracing_cpu(cpu)
9607                 tracing_init_tracefs_percpu(tr, cpu);
9608
9609         ftrace_init_tracefs(tr, d_tracer);
9610 }
9611
9612 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9613 {
9614         struct vfsmount *mnt;
9615         struct file_system_type *type;
9616
9617         /*
9618          * To maintain backward compatibility for tools that mount
9619          * debugfs to get to the tracing facility, tracefs is automatically
9620          * mounted to the debugfs/tracing directory.
9621          */
9622         type = get_fs_type("tracefs");
9623         if (!type)
9624                 return NULL;
9625         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9626         put_filesystem(type);
9627         if (IS_ERR(mnt))
9628                 return NULL;
9629         mntget(mnt);
9630
9631         return mnt;
9632 }
9633
9634 /**
9635  * tracing_init_dentry - initialize top level trace array
9636  *
9637  * This is called when creating files or directories in the tracing
9638  * directory. It is called via fs_initcall() by any of the boot up code
9639  * and expects to return the dentry of the top level tracing directory.
9640  */
9641 int tracing_init_dentry(void)
9642 {
9643         struct trace_array *tr = &global_trace;
9644
9645         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9646                 pr_warn("Tracing disabled due to lockdown\n");
9647                 return -EPERM;
9648         }
9649
9650         /* The top level trace array uses  NULL as parent */
9651         if (tr->dir)
9652                 return 0;
9653
9654         if (WARN_ON(!tracefs_initialized()))
9655                 return -ENODEV;
9656
9657         /*
9658          * As there may still be users that expect the tracing
9659          * files to exist in debugfs/tracing, we must automount
9660          * the tracefs file system there, so older tools still
9661          * work with the newer kernel.
9662          */
9663         tr->dir = debugfs_create_automount("tracing", NULL,
9664                                            trace_automount, NULL);
9665
9666         return 0;
9667 }
9668
9669 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9670 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9671
9672 static struct workqueue_struct *eval_map_wq __initdata;
9673 static struct work_struct eval_map_work __initdata;
9674 static struct work_struct tracerfs_init_work __initdata;
9675
9676 static void __init eval_map_work_func(struct work_struct *work)
9677 {
9678         int len;
9679
9680         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9681         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9682 }
9683
9684 static int __init trace_eval_init(void)
9685 {
9686         INIT_WORK(&eval_map_work, eval_map_work_func);
9687
9688         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9689         if (!eval_map_wq) {
9690                 pr_err("Unable to allocate eval_map_wq\n");
9691                 /* Do work here */
9692                 eval_map_work_func(&eval_map_work);
9693                 return -ENOMEM;
9694         }
9695
9696         queue_work(eval_map_wq, &eval_map_work);
9697         return 0;
9698 }
9699
9700 subsys_initcall(trace_eval_init);
9701
9702 static int __init trace_eval_sync(void)
9703 {
9704         /* Make sure the eval map updates are finished */
9705         if (eval_map_wq)
9706                 destroy_workqueue(eval_map_wq);
9707         return 0;
9708 }
9709
9710 late_initcall_sync(trace_eval_sync);
9711
9712
9713 #ifdef CONFIG_MODULES
9714 static void trace_module_add_evals(struct module *mod)
9715 {
9716         if (!mod->num_trace_evals)
9717                 return;
9718
9719         /*
9720          * Modules with bad taint do not have events created, do
9721          * not bother with enums either.
9722          */
9723         if (trace_module_has_bad_taint(mod))
9724                 return;
9725
9726         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9727 }
9728
9729 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9730 static void trace_module_remove_evals(struct module *mod)
9731 {
9732         union trace_eval_map_item *map;
9733         union trace_eval_map_item **last = &trace_eval_maps;
9734
9735         if (!mod->num_trace_evals)
9736                 return;
9737
9738         mutex_lock(&trace_eval_mutex);
9739
9740         map = trace_eval_maps;
9741
9742         while (map) {
9743                 if (map->head.mod == mod)
9744                         break;
9745                 map = trace_eval_jmp_to_tail(map);
9746                 last = &map->tail.next;
9747                 map = map->tail.next;
9748         }
9749         if (!map)
9750                 goto out;
9751
9752         *last = trace_eval_jmp_to_tail(map)->tail.next;
9753         kfree(map);
9754  out:
9755         mutex_unlock(&trace_eval_mutex);
9756 }
9757 #else
9758 static inline void trace_module_remove_evals(struct module *mod) { }
9759 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9760
9761 static int trace_module_notify(struct notifier_block *self,
9762                                unsigned long val, void *data)
9763 {
9764         struct module *mod = data;
9765
9766         switch (val) {
9767         case MODULE_STATE_COMING:
9768                 trace_module_add_evals(mod);
9769                 break;
9770         case MODULE_STATE_GOING:
9771                 trace_module_remove_evals(mod);
9772                 break;
9773         }
9774
9775         return NOTIFY_OK;
9776 }
9777
9778 static struct notifier_block trace_module_nb = {
9779         .notifier_call = trace_module_notify,
9780         .priority = 0,
9781 };
9782 #endif /* CONFIG_MODULES */
9783
9784 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9785 {
9786
9787         event_trace_init();
9788
9789         init_tracer_tracefs(&global_trace, NULL);
9790         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9791
9792         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9793                         &global_trace, &tracing_thresh_fops);
9794
9795         trace_create_file("README", TRACE_MODE_READ, NULL,
9796                         NULL, &tracing_readme_fops);
9797
9798         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9799                         NULL, &tracing_saved_cmdlines_fops);
9800
9801         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9802                           NULL, &tracing_saved_cmdlines_size_fops);
9803
9804         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9805                         NULL, &tracing_saved_tgids_fops);
9806
9807         trace_create_eval_file(NULL);
9808
9809 #ifdef CONFIG_MODULES
9810         register_module_notifier(&trace_module_nb);
9811 #endif
9812
9813 #ifdef CONFIG_DYNAMIC_FTRACE
9814         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9815                         NULL, &tracing_dyn_info_fops);
9816 #endif
9817
9818         create_trace_instances(NULL);
9819
9820         update_tracer_options(&global_trace);
9821 }
9822
9823 static __init int tracer_init_tracefs(void)
9824 {
9825         int ret;
9826
9827         trace_access_lock_init();
9828
9829         ret = tracing_init_dentry();
9830         if (ret)
9831                 return 0;
9832
9833         if (eval_map_wq) {
9834                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9835                 queue_work(eval_map_wq, &tracerfs_init_work);
9836         } else {
9837                 tracer_init_tracefs_work_func(NULL);
9838         }
9839
9840         rv_init_interface();
9841
9842         return 0;
9843 }
9844
9845 fs_initcall(tracer_init_tracefs);
9846
9847 static int trace_panic_handler(struct notifier_block *this,
9848                                unsigned long event, void *unused)
9849 {
9850         if (ftrace_dump_on_oops)
9851                 ftrace_dump(ftrace_dump_on_oops);
9852         return NOTIFY_OK;
9853 }
9854
9855 static struct notifier_block trace_panic_notifier = {
9856         .notifier_call  = trace_panic_handler,
9857         .next           = NULL,
9858         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9859 };
9860
9861 static int trace_die_handler(struct notifier_block *self,
9862                              unsigned long val,
9863                              void *data)
9864 {
9865         switch (val) {
9866         case DIE_OOPS:
9867                 if (ftrace_dump_on_oops)
9868                         ftrace_dump(ftrace_dump_on_oops);
9869                 break;
9870         default:
9871                 break;
9872         }
9873         return NOTIFY_OK;
9874 }
9875
9876 static struct notifier_block trace_die_notifier = {
9877         .notifier_call = trace_die_handler,
9878         .priority = 200
9879 };
9880
9881 /*
9882  * printk is set to max of 1024, we really don't need it that big.
9883  * Nothing should be printing 1000 characters anyway.
9884  */
9885 #define TRACE_MAX_PRINT         1000
9886
9887 /*
9888  * Define here KERN_TRACE so that we have one place to modify
9889  * it if we decide to change what log level the ftrace dump
9890  * should be at.
9891  */
9892 #define KERN_TRACE              KERN_EMERG
9893
9894 void
9895 trace_printk_seq(struct trace_seq *s)
9896 {
9897         /* Probably should print a warning here. */
9898         if (s->seq.len >= TRACE_MAX_PRINT)
9899                 s->seq.len = TRACE_MAX_PRINT;
9900
9901         /*
9902          * More paranoid code. Although the buffer size is set to
9903          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9904          * an extra layer of protection.
9905          */
9906         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9907                 s->seq.len = s->seq.size - 1;
9908
9909         /* should be zero ended, but we are paranoid. */
9910         s->buffer[s->seq.len] = 0;
9911
9912         printk(KERN_TRACE "%s", s->buffer);
9913
9914         trace_seq_init(s);
9915 }
9916
9917 void trace_init_global_iter(struct trace_iterator *iter)
9918 {
9919         iter->tr = &global_trace;
9920         iter->trace = iter->tr->current_trace;
9921         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9922         iter->array_buffer = &global_trace.array_buffer;
9923
9924         if (iter->trace && iter->trace->open)
9925                 iter->trace->open(iter);
9926
9927         /* Annotate start of buffers if we had overruns */
9928         if (ring_buffer_overruns(iter->array_buffer->buffer))
9929                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9930
9931         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9932         if (trace_clocks[iter->tr->clock_id].in_ns)
9933                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9934
9935         /* Can not use kmalloc for iter.temp and iter.fmt */
9936         iter->temp = static_temp_buf;
9937         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9938         iter->fmt = static_fmt_buf;
9939         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9940 }
9941
9942 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9943 {
9944         /* use static because iter can be a bit big for the stack */
9945         static struct trace_iterator iter;
9946         static atomic_t dump_running;
9947         struct trace_array *tr = &global_trace;
9948         unsigned int old_userobj;
9949         unsigned long flags;
9950         int cnt = 0, cpu;
9951
9952         /* Only allow one dump user at a time. */
9953         if (atomic_inc_return(&dump_running) != 1) {
9954                 atomic_dec(&dump_running);
9955                 return;
9956         }
9957
9958         /*
9959          * Always turn off tracing when we dump.
9960          * We don't need to show trace output of what happens
9961          * between multiple crashes.
9962          *
9963          * If the user does a sysrq-z, then they can re-enable
9964          * tracing with echo 1 > tracing_on.
9965          */
9966         tracing_off();
9967
9968         local_irq_save(flags);
9969
9970         /* Simulate the iterator */
9971         trace_init_global_iter(&iter);
9972
9973         for_each_tracing_cpu(cpu) {
9974                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9975         }
9976
9977         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9978
9979         /* don't look at user memory in panic mode */
9980         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9981
9982         switch (oops_dump_mode) {
9983         case DUMP_ALL:
9984                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9985                 break;
9986         case DUMP_ORIG:
9987                 iter.cpu_file = raw_smp_processor_id();
9988                 break;
9989         case DUMP_NONE:
9990                 goto out_enable;
9991         default:
9992                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9993                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9994         }
9995
9996         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9997
9998         /* Did function tracer already get disabled? */
9999         if (ftrace_is_dead()) {
10000                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10001                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10002         }
10003
10004         /*
10005          * We need to stop all tracing on all CPUS to read
10006          * the next buffer. This is a bit expensive, but is
10007          * not done often. We fill all what we can read,
10008          * and then release the locks again.
10009          */
10010
10011         while (!trace_empty(&iter)) {
10012
10013                 if (!cnt)
10014                         printk(KERN_TRACE "---------------------------------\n");
10015
10016                 cnt++;
10017
10018                 trace_iterator_reset(&iter);
10019                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10020
10021                 if (trace_find_next_entry_inc(&iter) != NULL) {
10022                         int ret;
10023
10024                         ret = print_trace_line(&iter);
10025                         if (ret != TRACE_TYPE_NO_CONSUME)
10026                                 trace_consume(&iter);
10027                 }
10028                 touch_nmi_watchdog();
10029
10030                 trace_printk_seq(&iter.seq);
10031         }
10032
10033         if (!cnt)
10034                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10035         else
10036                 printk(KERN_TRACE "---------------------------------\n");
10037
10038  out_enable:
10039         tr->trace_flags |= old_userobj;
10040
10041         for_each_tracing_cpu(cpu) {
10042                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10043         }
10044         atomic_dec(&dump_running);
10045         local_irq_restore(flags);
10046 }
10047 EXPORT_SYMBOL_GPL(ftrace_dump);
10048
10049 #define WRITE_BUFSIZE  4096
10050
10051 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10052                                 size_t count, loff_t *ppos,
10053                                 int (*createfn)(const char *))
10054 {
10055         char *kbuf, *buf, *tmp;
10056         int ret = 0;
10057         size_t done = 0;
10058         size_t size;
10059
10060         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10061         if (!kbuf)
10062                 return -ENOMEM;
10063
10064         while (done < count) {
10065                 size = count - done;
10066
10067                 if (size >= WRITE_BUFSIZE)
10068                         size = WRITE_BUFSIZE - 1;
10069
10070                 if (copy_from_user(kbuf, buffer + done, size)) {
10071                         ret = -EFAULT;
10072                         goto out;
10073                 }
10074                 kbuf[size] = '\0';
10075                 buf = kbuf;
10076                 do {
10077                         tmp = strchr(buf, '\n');
10078                         if (tmp) {
10079                                 *tmp = '\0';
10080                                 size = tmp - buf + 1;
10081                         } else {
10082                                 size = strlen(buf);
10083                                 if (done + size < count) {
10084                                         if (buf != kbuf)
10085                                                 break;
10086                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10087                                         pr_warn("Line length is too long: Should be less than %d\n",
10088                                                 WRITE_BUFSIZE - 2);
10089                                         ret = -EINVAL;
10090                                         goto out;
10091                                 }
10092                         }
10093                         done += size;
10094
10095                         /* Remove comments */
10096                         tmp = strchr(buf, '#');
10097
10098                         if (tmp)
10099                                 *tmp = '\0';
10100
10101                         ret = createfn(buf);
10102                         if (ret)
10103                                 goto out;
10104                         buf += size;
10105
10106                 } while (done < count);
10107         }
10108         ret = done;
10109
10110 out:
10111         kfree(kbuf);
10112
10113         return ret;
10114 }
10115
10116 __init static int tracer_alloc_buffers(void)
10117 {
10118         int ring_buf_size;
10119         int ret = -ENOMEM;
10120
10121
10122         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10123                 pr_warn("Tracing disabled due to lockdown\n");
10124                 return -EPERM;
10125         }
10126
10127         /*
10128          * Make sure we don't accidentally add more trace options
10129          * than we have bits for.
10130          */
10131         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10132
10133         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10134                 goto out;
10135
10136         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10137                 goto out_free_buffer_mask;
10138
10139         /* Only allocate trace_printk buffers if a trace_printk exists */
10140         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10141                 /* Must be called before global_trace.buffer is allocated */
10142                 trace_printk_init_buffers();
10143
10144         /* To save memory, keep the ring buffer size to its minimum */
10145         if (ring_buffer_expanded)
10146                 ring_buf_size = trace_buf_size;
10147         else
10148                 ring_buf_size = 1;
10149
10150         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10151         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10152
10153         raw_spin_lock_init(&global_trace.start_lock);
10154
10155         /*
10156          * The prepare callbacks allocates some memory for the ring buffer. We
10157          * don't free the buffer if the CPU goes down. If we were to free
10158          * the buffer, then the user would lose any trace that was in the
10159          * buffer. The memory will be removed once the "instance" is removed.
10160          */
10161         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10162                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10163                                       NULL);
10164         if (ret < 0)
10165                 goto out_free_cpumask;
10166         /* Used for event triggers */
10167         ret = -ENOMEM;
10168         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10169         if (!temp_buffer)
10170                 goto out_rm_hp_state;
10171
10172         if (trace_create_savedcmd() < 0)
10173                 goto out_free_temp_buffer;
10174
10175         /* TODO: make the number of buffers hot pluggable with CPUS */
10176         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10177                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10178                 goto out_free_savedcmd;
10179         }
10180
10181         if (global_trace.buffer_disabled)
10182                 tracing_off();
10183
10184         if (trace_boot_clock) {
10185                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10186                 if (ret < 0)
10187                         pr_warn("Trace clock %s not defined, going back to default\n",
10188                                 trace_boot_clock);
10189         }
10190
10191         /*
10192          * register_tracer() might reference current_trace, so it
10193          * needs to be set before we register anything. This is
10194          * just a bootstrap of current_trace anyway.
10195          */
10196         global_trace.current_trace = &nop_trace;
10197
10198         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10199
10200         ftrace_init_global_array_ops(&global_trace);
10201
10202         init_trace_flags_index(&global_trace);
10203
10204         register_tracer(&nop_trace);
10205
10206         /* Function tracing may start here (via kernel command line) */
10207         init_function_trace();
10208
10209         /* All seems OK, enable tracing */
10210         tracing_disabled = 0;
10211
10212         atomic_notifier_chain_register(&panic_notifier_list,
10213                                        &trace_panic_notifier);
10214
10215         register_die_notifier(&trace_die_notifier);
10216
10217         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10218
10219         INIT_LIST_HEAD(&global_trace.systems);
10220         INIT_LIST_HEAD(&global_trace.events);
10221         INIT_LIST_HEAD(&global_trace.hist_vars);
10222         INIT_LIST_HEAD(&global_trace.err_log);
10223         list_add(&global_trace.list, &ftrace_trace_arrays);
10224
10225         apply_trace_boot_options();
10226
10227         register_snapshot_cmd();
10228
10229         test_can_verify();
10230
10231         return 0;
10232
10233 out_free_savedcmd:
10234         free_saved_cmdlines_buffer(savedcmd);
10235 out_free_temp_buffer:
10236         ring_buffer_free(temp_buffer);
10237 out_rm_hp_state:
10238         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10239 out_free_cpumask:
10240         free_cpumask_var(global_trace.tracing_cpumask);
10241 out_free_buffer_mask:
10242         free_cpumask_var(tracing_buffer_mask);
10243 out:
10244         return ret;
10245 }
10246
10247 void __init ftrace_boot_snapshot(void)
10248 {
10249         if (snapshot_at_boot) {
10250                 tracing_snapshot();
10251                 internal_trace_puts("** Boot snapshot taken **\n");
10252         }
10253 }
10254
10255 void __init early_trace_init(void)
10256 {
10257         if (tracepoint_printk) {
10258                 tracepoint_print_iter =
10259                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10260                 if (MEM_FAIL(!tracepoint_print_iter,
10261                              "Failed to allocate trace iterator\n"))
10262                         tracepoint_printk = 0;
10263                 else
10264                         static_key_enable(&tracepoint_printk_key.key);
10265         }
10266         tracer_alloc_buffers();
10267 }
10268
10269 void __init trace_init(void)
10270 {
10271         trace_event_init();
10272 }
10273
10274 __init static void clear_boot_tracer(void)
10275 {
10276         /*
10277          * The default tracer at boot buffer is an init section.
10278          * This function is called in lateinit. If we did not
10279          * find the boot tracer, then clear it out, to prevent
10280          * later registration from accessing the buffer that is
10281          * about to be freed.
10282          */
10283         if (!default_bootup_tracer)
10284                 return;
10285
10286         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10287                default_bootup_tracer);
10288         default_bootup_tracer = NULL;
10289 }
10290
10291 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10292 __init static void tracing_set_default_clock(void)
10293 {
10294         /* sched_clock_stable() is determined in late_initcall */
10295         if (!trace_boot_clock && !sched_clock_stable()) {
10296                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10297                         pr_warn("Can not set tracing clock due to lockdown\n");
10298                         return;
10299                 }
10300
10301                 printk(KERN_WARNING
10302                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10303                        "If you want to keep using the local clock, then add:\n"
10304                        "  \"trace_clock=local\"\n"
10305                        "on the kernel command line\n");
10306                 tracing_set_clock(&global_trace, "global");
10307         }
10308 }
10309 #else
10310 static inline void tracing_set_default_clock(void) { }
10311 #endif
10312
10313 __init static int late_trace_init(void)
10314 {
10315         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10316                 static_key_disable(&tracepoint_printk_key.key);
10317                 tracepoint_printk = 0;
10318         }
10319
10320         tracing_set_default_clock();
10321         clear_boot_tracer();
10322         return 0;
10323 }
10324
10325 late_initcall_sync(late_trace_init);
This page took 0.624359 seconds and 4 git commands to generate.