]> Git Repo - linux.git/blobdiff - kernel/trace/ftrace.c
ftrace: Added ftrace_func_mapper for function probe triggers
[linux.git] / kernel / trace / ftrace.c
index b9691ee8f6c182cfee1af7308555b9291f3730bd..ac47d1845fdb4e42ef0c2fb419bf508a5048f39b 100644 (file)
@@ -36,6 +36,7 @@
 
 #include <trace/events/sched.h>
 
+#include <asm/sections.h>
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -1100,7 +1101,6 @@ static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
 struct ftrace_func_probe {
        struct hlist_node       node;
        struct ftrace_probe_ops *ops;
-       unsigned long           flags;
        unsigned long           ip;
        void                    *data;
        struct list_head        free_list;
@@ -2807,18 +2807,28 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
         * callers are done before leaving this function.
         * The same goes for freeing the per_cpu data of the per_cpu
         * ops.
-        *
-        * Again, normal synchronize_sched() is not good enough.
-        * We need to do a hard force of sched synchronization.
-        * This is because we use preempt_disable() to do RCU, but
-        * the function tracers can be called where RCU is not watching
-        * (like before user_exit()). We can not rely on the RCU
-        * infrastructure to do the synchronization, thus we must do it
-        * ourselves.
         */
        if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) {
+               /*
+                * We need to do a hard force of sched synchronization.
+                * This is because we use preempt_disable() to do RCU, but
+                * the function tracers can be called where RCU is not watching
+                * (like before user_exit()). We can not rely on the RCU
+                * infrastructure to do the synchronization, thus we must do it
+                * ourselves.
+                */
                schedule_on_each_cpu(ftrace_sync);
 
+               /*
+                * When the kernel is preeptive, tasks can be preempted
+                * while on a ftrace trampoline. Just scheduling a task on
+                * a CPU is not good enough to flush them. Calling
+                * synchornize_rcu_tasks() will wait for those tasks to
+                * execute and either schedule voluntarily or enter user space.
+                */
+               if (IS_ENABLED(CONFIG_PREEMPT))
+                       synchronize_rcu_tasks();
+
                arch_ftrace_trampoline_free(ops);
 
                if (ops->flags & FTRACE_OPS_FL_PER_CPU)
@@ -3154,23 +3164,12 @@ t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
 }
 
 static void *
-t_next(struct seq_file *m, void *v, loff_t *pos)
+t_func_next(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
-       struct ftrace_ops *ops = iter->ops;
        struct dyn_ftrace *rec = NULL;
 
-       if (unlikely(ftrace_disabled))
-               return NULL;
-
-       if (iter->flags & FTRACE_ITER_HASH)
-               return t_hash_next(m, pos);
-
        (*pos)++;
-       iter->pos = iter->func_pos = *pos;
-
-       if (iter->flags & FTRACE_ITER_PRINTALL)
-               return t_hash_start(m, pos);
 
  retry:
        if (iter->idx >= iter->pg->index) {
@@ -3181,11 +3180,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                }
        } else {
                rec = &iter->pg->records[iter->idx++];
-               if (((iter->flags & FTRACE_ITER_FILTER) &&
-                    !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) ||
-
-                   ((iter->flags & FTRACE_ITER_NOTRACE) &&
-                    !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) ||
+               if (((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) &&
+                    !ftrace_lookup_ip(iter->hash, rec->ip)) ||
 
                    ((iter->flags & FTRACE_ITER_ENABLED) &&
                     !(rec->flags & FTRACE_FL_ENABLED))) {
@@ -3196,13 +3192,41 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
        }
 
        if (!rec)
-               return t_hash_start(m, pos);
+               return NULL;
 
+       iter->pos = iter->func_pos = *pos;
        iter->func = rec;
 
        return iter;
 }
 
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct ftrace_iterator *iter = m->private;
+       loff_t l = *pos; /* t_hash_start() must use original pos */
+       void *ret;
+
+       if (unlikely(ftrace_disabled))
+               return NULL;
+
+       if (iter->flags & FTRACE_ITER_HASH)
+               return t_hash_next(m, pos);
+
+       if (iter->flags & FTRACE_ITER_PRINTALL) {
+               /* next must increment pos, and t_hash_start does not */
+               (*pos)++;
+               return t_hash_start(m, &l);
+       }
+
+       ret = t_func_next(m, pos);
+
+       if (!ret)
+               return t_hash_start(m, &l);
+
+       return ret;
+}
+
 static void reset_iter_read(struct ftrace_iterator *iter)
 {
        iter->pos = 0;
@@ -3213,7 +3237,6 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
-       struct ftrace_ops *ops = iter->ops;
        void *p = NULL;
        loff_t l;
 
@@ -3233,10 +3256,9 @@ static void *t_start(struct seq_file *m, loff_t *pos)
         * off, we can short cut and just print out that all
         * functions are enabled.
         */
-       if ((iter->flags & FTRACE_ITER_FILTER &&
-            ftrace_hash_empty(ops->func_hash->filter_hash)) ||
-           (iter->flags & FTRACE_ITER_NOTRACE &&
-            ftrace_hash_empty(ops->func_hash->notrace_hash))) {
+       if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) &&
+           ftrace_hash_empty(iter->hash)) {
+               iter->func_pos = 1; /* Account for the message */
                if (*pos > 0)
                        return t_hash_start(m, pos);
                iter->flags |= FTRACE_ITER_PRINTALL;
@@ -3256,7 +3278,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
        iter->pg = ftrace_pages_start;
        iter->idx = 0;
        for (l = 0; l <= *pos; ) {
-               p = t_next(m, p, &l);
+               p = t_func_next(m, &l);
                if (!p)
                        break;
        }
@@ -3355,12 +3377,13 @@ ftrace_avail_open(struct inode *inode, struct file *file)
                return -ENODEV;
 
        iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
-       if (iter) {
-               iter->pg = ftrace_pages_start;
-               iter->ops = &global_ops;
-       }
+       if (!iter)
+               return -ENOMEM;
+
+       iter->pg = ftrace_pages_start;
+       iter->ops = &global_ops;
 
-       return iter ? 0 : -ENOMEM;
+       return 0;
 }
 
 static int
@@ -3369,13 +3392,14 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
        struct ftrace_iterator *iter;
 
        iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
-       if (iter) {
-               iter->pg = ftrace_pages_start;
-               iter->flags = FTRACE_ITER_ENABLED;
-               iter->ops = &global_ops;
-       }
+       if (!iter)
+               return -ENOMEM;
+
+       iter->pg = ftrace_pages_start;
+       iter->flags = FTRACE_ITER_ENABLED;
+       iter->ops = &global_ops;
 
-       return iter ? 0 : -ENOMEM;
+       return 0;
 }
 
 /**
@@ -3440,7 +3464,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
                        ret = -ENOMEM;
                        goto out_unlock;
                }
-       }
+       } else
+               iter->hash = hash;
 
        if (file->f_mode & FMODE_READ) {
                iter->pg = ftrace_pages_start;
@@ -3714,7 +3739,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
        preempt_disable_notrace();
        hlist_for_each_entry_rcu_notrace(entry, hhd, node) {
                if (entry->ip == ip)
-                       entry->ops->func(ip, parent_ip, &entry->data);
+                       entry->ops->func(ip, parent_ip, entry->ops, &entry->data);
        }
        preempt_enable_notrace();
 }
@@ -3755,23 +3780,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
        ftrace_probe_registered = 1;
 }
 
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
 {
        int i;
 
        if (!ftrace_probe_registered)
-               return;
+               return false;
 
        for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
                struct hlist_head *hhd = &ftrace_func_hash[i];
                if (hhd->first)
-                       return;
+                       return false;
        }
 
        /* no more funcs left */
        ftrace_shutdown(&trace_probe_ops, 0);
 
        ftrace_probe_registered = 0;
+       return true;
 }
 
 
@@ -3782,6 +3808,147 @@ static void ftrace_free_entry(struct ftrace_func_probe *entry)
        kfree(entry);
 }
 
+struct ftrace_func_map {
+       struct ftrace_func_entry        entry;
+       void                            *data;
+};
+
+struct ftrace_func_mapper {
+       struct ftrace_hash              hash;
+};
+
+/**
+ * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper
+ *
+ * Returns a ftrace_func_mapper descriptor that can be used to map ips to data.
+ */
+struct ftrace_func_mapper *allocate_ftrace_func_mapper(void)
+{
+       struct ftrace_hash *hash;
+
+       /*
+        * The mapper is simply a ftrace_hash, but since the entries
+        * in the hash are not ftrace_func_entry type, we define it
+        * as a separate structure.
+        */
+       hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+       return (struct ftrace_func_mapper *)hash;
+}
+
+/**
+ * ftrace_func_mapper_find_ip - Find some data mapped to an ip
+ * @mapper: The mapper that has the ip maps
+ * @ip: the instruction pointer to find the data for
+ *
+ * Returns the data mapped to @ip if found otherwise NULL. The return
+ * is actually the address of the mapper data pointer. The address is
+ * returned for use cases where the data is no bigger than a long, and
+ * the user can use the data pointer as its data instead of having to
+ * allocate more memory for the reference.
+ */
+void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper,
+                                 unsigned long ip)
+{
+       struct ftrace_func_entry *entry;
+       struct ftrace_func_map *map;
+
+       entry = ftrace_lookup_ip(&mapper->hash, ip);
+       if (!entry)
+               return NULL;
+
+       map = (struct ftrace_func_map *)entry;
+       return &map->data;
+}
+
+/**
+ * ftrace_func_mapper_add_ip - Map some data to an ip
+ * @mapper: The mapper that has the ip maps
+ * @ip: The instruction pointer address to map @data to
+ * @data: The data to map to @ip
+ *
+ * Returns 0 on succes otherwise an error.
+ */
+int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
+                             unsigned long ip, void *data)
+{
+       struct ftrace_func_entry *entry;
+       struct ftrace_func_map *map;
+
+       entry = ftrace_lookup_ip(&mapper->hash, ip);
+       if (entry)
+               return -EBUSY;
+
+       map = kmalloc(sizeof(*map), GFP_KERNEL);
+       if (!map)
+               return -ENOMEM;
+
+       map->entry.ip = ip;
+       map->data = data;
+
+       __add_hash_entry(&mapper->hash, &map->entry);
+
+       return 0;
+}
+
+/**
+ * ftrace_func_mapper_remove_ip - Remove an ip from the mapping
+ * @mapper: The mapper that has the ip maps
+ * @ip: The instruction pointer address to remove the data from
+ *
+ * Returns the data if it is found, otherwise NULL.
+ * Note, if the data pointer is used as the data itself, (see 
+ * ftrace_func_mapper_find_ip(), then the return value may be meaningless,
+ * if the data pointer was set to zero.
+ */
+void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper,
+                                  unsigned long ip)
+{
+       struct ftrace_func_entry *entry;
+       struct ftrace_func_map *map;
+       void *data;
+
+       entry = ftrace_lookup_ip(&mapper->hash, ip);
+       if (!entry)
+               return NULL;
+
+       map = (struct ftrace_func_map *)entry;
+       data = map->data;
+
+       remove_hash_entry(&mapper->hash, entry);
+       kfree(entry);
+
+       return data;
+}
+
+/**
+ * free_ftrace_func_mapper - free a mapping of ips and data
+ * @mapper: The mapper that has the ip maps
+ * @free_func: A function to be called on each data item.
+ *
+ * This is used to free the function mapper. The @free_func is optional
+ * and can be used if the data needs to be freed as well.
+ */
+void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
+                            ftrace_mapper_func free_func)
+{
+       struct ftrace_func_entry *entry;
+       struct ftrace_func_map *map;
+       struct hlist_head *hhd;
+       int size = 1 << mapper->hash.size_bits;
+       int i;
+
+       if (free_func && mapper->hash.count) {
+               for (i = 0; i < size; i++) {
+                       hhd = &mapper->hash.buckets[i];
+                       hlist_for_each_entry(entry, hhd, hlist) {
+                               map = (struct ftrace_func_map *)entry;
+                               free_func(map);
+                       }
+               }
+       }
+       free_ftrace_hash(&mapper->hash);
+}
+
 int
 register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                              void *data)
@@ -3901,6 +4068,7 @@ static void
 __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                                  void *data, int flags)
 {
+       struct ftrace_ops_hash old_hash_ops;
        struct ftrace_func_entry *rec_entry;
        struct ftrace_func_probe *entry;
        struct ftrace_func_probe *p;
@@ -3912,6 +4080,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
        struct hlist_node *tmp;
        char str[KSYM_SYMBOL_LEN];
        int i, ret;
+       bool disabled;
 
        if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
                func_g.search = NULL;
@@ -3930,6 +4099,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
        mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
+       old_hash_ops.filter_hash = old_hash;
+       /* Probes only have filters */
+       old_hash_ops.notrace_hash = NULL;
+
        hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
        if (!hash)
                /* Hmm, should report this somehow */
@@ -3967,12 +4140,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                }
        }
        mutex_lock(&ftrace_lock);
-       __disable_ftrace_function_probe();
+       disabled = __disable_ftrace_function_probe();
        /*
         * Remove after the disable is called. Otherwise, if the last
         * probe is removed, a null hash means *all enabled*.
         */
        ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+       /* still need to update the function call sites */
+       if (ftrace_enabled && !disabled)
+               ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+                                      &old_hash_ops);
        synchronize_sched();
        if (!ret)
                free_ftrace_hash_rcu(old_hash);
@@ -4524,6 +4702,9 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
                        free_ftrace_hash_rcu(old_hash);
                }
                mutex_unlock(&ftrace_lock);
+       } else {
+               /* For read only, the hash is the ops hash */
+               iter->hash = NULL;
        }
 
        mutex_unlock(&iter->ops->func_hash->regex_lock);
@@ -5262,6 +5443,50 @@ void ftrace_module_init(struct module *mod)
 }
 #endif /* CONFIG_MODULES */
 
+void __init ftrace_free_init_mem(void)
+{
+       unsigned long start = (unsigned long)(&__init_begin);
+       unsigned long end = (unsigned long)(&__init_end);
+       struct ftrace_page **last_pg = &ftrace_pages_start;
+       struct ftrace_page *pg;
+       struct dyn_ftrace *rec;
+       struct dyn_ftrace key;
+       int order;
+
+       key.ip = start;
+       key.flags = end;        /* overload flags, as it is unsigned long */
+
+       mutex_lock(&ftrace_lock);
+
+       for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) {
+               if (end < pg->records[0].ip ||
+                   start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+                       continue;
+ again:
+               rec = bsearch(&key, pg->records, pg->index,
+                             sizeof(struct dyn_ftrace),
+                             ftrace_cmp_recs);
+               if (!rec)
+                       continue;
+               pg->index--;
+               if (!pg->index) {
+                       *last_pg = pg->next;
+                       order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+                       free_pages((unsigned long)pg->records, order);
+                       kfree(pg);
+                       pg = container_of(last_pg, struct ftrace_page, next);
+                       if (!(*last_pg))
+                               ftrace_pages = pg;
+                       continue;
+               }
+               memmove(rec, rec + 1,
+                       (pg->index - (rec - pg->records)) * sizeof(*rec));
+               /* More than one function may be in this block */
+               goto again;
+       }
+       mutex_unlock(&ftrace_lock);
+}
+
 void __init ftrace_init(void)
 {
        extern unsigned long __start_mcount_loc[];
@@ -5304,22 +5529,6 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 
 static void ftrace_update_trampoline(struct ftrace_ops *ops)
 {
-
-/*
- * Currently there's no safe way to free a trampoline when the kernel
- * is configured with PREEMPT. That is because a task could be preempted
- * when it jumped to the trampoline, it may be preempted for a long time
- * depending on the system load, and currently there's no way to know
- * when it will be off the trampoline. If the trampoline is freed
- * too early, when the task runs again, it will be executing on freed
- * memory and crash.
- */
-#ifdef CONFIG_PREEMPT
-       /* Currently, only non dynamic ops can have a trampoline */
-       if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
-               return;
-#endif
-
        arch_ftrace_update_trampoline(ops);
 }
 
@@ -5531,6 +5740,43 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
                       trace_ignore_this_task(pid_list, next));
 }
 
+static void
+ftrace_pid_follow_sched_process_fork(void *data,
+                                    struct task_struct *self,
+                                    struct task_struct *task)
+{
+       struct trace_pid_list *pid_list;
+       struct trace_array *tr = data;
+
+       pid_list = rcu_dereference_sched(tr->function_pids);
+       trace_filter_add_remove_task(pid_list, self, task);
+}
+
+static void
+ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task)
+{
+       struct trace_pid_list *pid_list;
+       struct trace_array *tr = data;
+
+       pid_list = rcu_dereference_sched(tr->function_pids);
+       trace_filter_add_remove_task(pid_list, NULL, task);
+}
+
+void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
+{
+       if (enable) {
+               register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
+                                                 tr);
+               register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
+                                                 tr);
+       } else {
+               unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
+                                                   tr);
+               unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
+                                                   tr);
+       }
+}
+
 static void clear_ftrace_pids(struct trace_array *tr)
 {
        struct trace_pid_list *pid_list;
This page took 0.041734 seconds and 4 git commands to generate.