]>
Commit | Line | Data |
---|---|---|
8256e47c MD |
1 | /* |
2 | * Copyright (C) 2007 Mathieu Desnoyers | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | */ | |
18 | #include <linux/module.h> | |
19 | #include <linux/mutex.h> | |
20 | #include <linux/types.h> | |
21 | #include <linux/jhash.h> | |
22 | #include <linux/list.h> | |
23 | #include <linux/rcupdate.h> | |
24 | #include <linux/marker.h> | |
25 | #include <linux/err.h> | |
1aeb272c | 26 | #include <linux/slab.h> |
8256e47c MD |
27 | |
28 | extern struct marker __start___markers[]; | |
29 | extern struct marker __stop___markers[]; | |
30 | ||
fb40bd78 | 31 | /* Set to 1 to enable marker debug output */ |
ab883af5 | 32 | static const int marker_debug; |
fb40bd78 | 33 | |
8256e47c | 34 | /* |
314de8a9 | 35 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin |
fb40bd78 | 36 | * and module markers and the hash table. |
8256e47c MD |
37 | */ |
38 | static DEFINE_MUTEX(markers_mutex); | |
39 | ||
8256e47c MD |
40 | /* |
41 | * Marker hash table, containing the active markers. | |
42 | * Protected by module_mutex. | |
43 | */ | |
44 | #define MARKER_HASH_BITS 6 | |
45 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | |
46 | ||
fb40bd78 MD |
47 | /* |
48 | * Note about RCU : | |
49 | * It is used to make sure every handler has finished using its private data | |
50 | * between two consecutive operation (add or remove) on a given marker. It is | |
51 | * also used to delay the free of multiple probes array until a quiescent state | |
52 | * is reached. | |
53 | * marker entries modifications are protected by the markers_mutex. | |
54 | */ | |
8256e47c MD |
55 | struct marker_entry { |
56 | struct hlist_node hlist; | |
57 | char *format; | |
dc102a8f MD |
58 | /* Probe wrapper */ |
59 | void (*call)(const struct marker *mdata, void *call_private, ...); | |
fb40bd78 MD |
60 | struct marker_probe_closure single; |
61 | struct marker_probe_closure *multi; | |
8256e47c | 62 | int refcount; /* Number of times armed. 0 if disarmed. */ |
fb40bd78 MD |
63 | struct rcu_head rcu; |
64 | void *oldptr; | |
de4fc64f HH |
65 | unsigned char rcu_pending:1; |
66 | unsigned char ptype:1; | |
8256e47c MD |
67 | char name[0]; /* Contains name'\0'format'\0' */ |
68 | }; | |
69 | ||
70 | static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |
71 | ||
72 | /** | |
73 | * __mark_empty_function - Empty probe callback | |
fb40bd78 MD |
74 | * @probe_private: probe private data |
75 | * @call_private: call site private data | |
8256e47c MD |
76 | * @fmt: format string |
77 | * @...: variable argument list | |
78 | * | |
79 | * Empty callback provided as a probe to the markers. By providing this to a | |
80 | * disabled marker, we make sure the execution flow is always valid even | |
81 | * though the function pointer change and the marker enabling are two distinct | |
82 | * operations that modifies the execution flow of preemptible code. | |
83 | */ | |
fb40bd78 MD |
84 | void __mark_empty_function(void *probe_private, void *call_private, |
85 | const char *fmt, va_list *args) | |
8256e47c MD |
86 | { |
87 | } | |
88 | EXPORT_SYMBOL_GPL(__mark_empty_function); | |
89 | ||
fb40bd78 MD |
90 | /* |
91 | * marker_probe_cb Callback that prepares the variable argument list for probes. | |
92 | * @mdata: pointer of type struct marker | |
93 | * @call_private: caller site private data | |
fb40bd78 MD |
94 | * @...: Variable argument list. |
95 | * | |
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | |
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | |
98 | * rcu_dereference() for the pointer read. | |
99 | */ | |
dc102a8f | 100 | void marker_probe_cb(const struct marker *mdata, void *call_private, ...) |
fb40bd78 MD |
101 | { |
102 | va_list args; | |
103 | char ptype; | |
104 | ||
105 | /* | |
fd3c36f8 MD |
106 | * preempt_disable does two things : disabling preemption to make sure |
107 | * the teardown of the callbacks can be done correctly when they are in | |
108 | * modules and they insure RCU read coherency. | |
fb40bd78 MD |
109 | */ |
110 | preempt_disable(); | |
58336114 | 111 | ptype = mdata->ptype; |
fb40bd78 MD |
112 | if (likely(!ptype)) { |
113 | marker_probe_func *func; | |
114 | /* Must read the ptype before ptr. They are not data dependant, | |
115 | * so we put an explicit smp_rmb() here. */ | |
116 | smp_rmb(); | |
58336114 | 117 | func = mdata->single.func; |
fb40bd78 MD |
118 | /* Must read the ptr before private data. They are not data |
119 | * dependant, so we put an explicit smp_rmb() here. */ | |
120 | smp_rmb(); | |
dc102a8f MD |
121 | va_start(args, call_private); |
122 | func(mdata->single.probe_private, call_private, mdata->format, | |
123 | &args); | |
fb40bd78 MD |
124 | va_end(args); |
125 | } else { | |
126 | struct marker_probe_closure *multi; | |
127 | int i; | |
128 | /* | |
129 | * multi points to an array, therefore accessing the array | |
130 | * depends on reading multi. However, even in this case, | |
131 | * we must insure that the pointer is read _before_ the array | |
132 | * data. Same as rcu_dereference, but we need a full smp_rmb() | |
133 | * in the fast path, so put the explicit barrier here. | |
134 | */ | |
135 | smp_read_barrier_depends(); | |
58336114 | 136 | multi = mdata->multi; |
fb40bd78 | 137 | for (i = 0; multi[i].func; i++) { |
dc102a8f MD |
138 | va_start(args, call_private); |
139 | multi[i].func(multi[i].probe_private, call_private, | |
140 | mdata->format, &args); | |
fb40bd78 MD |
141 | va_end(args); |
142 | } | |
143 | } | |
144 | preempt_enable(); | |
145 | } | |
146 | EXPORT_SYMBOL_GPL(marker_probe_cb); | |
147 | ||
148 | /* | |
149 | * marker_probe_cb Callback that does not prepare the variable argument list. | |
150 | * @mdata: pointer of type struct marker | |
151 | * @call_private: caller site private data | |
fb40bd78 MD |
152 | * @...: Variable argument list. |
153 | * | |
154 | * Should be connected to markers "MARK_NOARGS". | |
155 | */ | |
dc102a8f | 156 | void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) |
fb40bd78 MD |
157 | { |
158 | va_list args; /* not initialized */ | |
159 | char ptype; | |
160 | ||
161 | preempt_disable(); | |
58336114 | 162 | ptype = mdata->ptype; |
fb40bd78 MD |
163 | if (likely(!ptype)) { |
164 | marker_probe_func *func; | |
165 | /* Must read the ptype before ptr. They are not data dependant, | |
166 | * so we put an explicit smp_rmb() here. */ | |
167 | smp_rmb(); | |
58336114 | 168 | func = mdata->single.func; |
fb40bd78 MD |
169 | /* Must read the ptr before private data. They are not data |
170 | * dependant, so we put an explicit smp_rmb() here. */ | |
171 | smp_rmb(); | |
dc102a8f MD |
172 | func(mdata->single.probe_private, call_private, mdata->format, |
173 | &args); | |
fb40bd78 MD |
174 | } else { |
175 | struct marker_probe_closure *multi; | |
176 | int i; | |
177 | /* | |
178 | * multi points to an array, therefore accessing the array | |
179 | * depends on reading multi. However, even in this case, | |
180 | * we must insure that the pointer is read _before_ the array | |
181 | * data. Same as rcu_dereference, but we need a full smp_rmb() | |
182 | * in the fast path, so put the explicit barrier here. | |
183 | */ | |
184 | smp_read_barrier_depends(); | |
58336114 | 185 | multi = mdata->multi; |
fb40bd78 | 186 | for (i = 0; multi[i].func; i++) |
dc102a8f MD |
187 | multi[i].func(multi[i].probe_private, call_private, |
188 | mdata->format, &args); | |
fb40bd78 MD |
189 | } |
190 | preempt_enable(); | |
191 | } | |
192 | EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); | |
193 | ||
194 | static void free_old_closure(struct rcu_head *head) | |
195 | { | |
196 | struct marker_entry *entry = container_of(head, | |
197 | struct marker_entry, rcu); | |
198 | kfree(entry->oldptr); | |
199 | /* Make sure we free the data before setting the pending flag to 0 */ | |
200 | smp_wmb(); | |
201 | entry->rcu_pending = 0; | |
202 | } | |
203 | ||
204 | static void debug_print_probes(struct marker_entry *entry) | |
205 | { | |
206 | int i; | |
207 | ||
208 | if (!marker_debug) | |
209 | return; | |
210 | ||
211 | if (!entry->ptype) { | |
212 | printk(KERN_DEBUG "Single probe : %p %p\n", | |
213 | entry->single.func, | |
214 | entry->single.probe_private); | |
215 | } else { | |
216 | for (i = 0; entry->multi[i].func; i++) | |
217 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | |
218 | entry->multi[i].func, | |
219 | entry->multi[i].probe_private); | |
220 | } | |
221 | } | |
222 | ||
223 | static struct marker_probe_closure * | |
224 | marker_entry_add_probe(struct marker_entry *entry, | |
225 | marker_probe_func *probe, void *probe_private) | |
226 | { | |
227 | int nr_probes = 0; | |
228 | struct marker_probe_closure *old, *new; | |
229 | ||
230 | WARN_ON(!probe); | |
231 | ||
232 | debug_print_probes(entry); | |
233 | old = entry->multi; | |
234 | if (!entry->ptype) { | |
235 | if (entry->single.func == probe && | |
236 | entry->single.probe_private == probe_private) | |
237 | return ERR_PTR(-EBUSY); | |
238 | if (entry->single.func == __mark_empty_function) { | |
239 | /* 0 -> 1 probes */ | |
240 | entry->single.func = probe; | |
241 | entry->single.probe_private = probe_private; | |
242 | entry->refcount = 1; | |
243 | entry->ptype = 0; | |
244 | debug_print_probes(entry); | |
245 | return NULL; | |
246 | } else { | |
247 | /* 1 -> 2 probes */ | |
248 | nr_probes = 1; | |
249 | old = NULL; | |
250 | } | |
251 | } else { | |
252 | /* (N -> N+1), (N != 0, 1) probes */ | |
253 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | |
254 | if (old[nr_probes].func == probe | |
255 | && old[nr_probes].probe_private | |
256 | == probe_private) | |
257 | return ERR_PTR(-EBUSY); | |
258 | } | |
259 | /* + 2 : one for new probe, one for NULL func */ | |
260 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | |
261 | GFP_KERNEL); | |
262 | if (new == NULL) | |
263 | return ERR_PTR(-ENOMEM); | |
264 | if (!old) | |
265 | new[0] = entry->single; | |
266 | else | |
267 | memcpy(new, old, | |
268 | nr_probes * sizeof(struct marker_probe_closure)); | |
269 | new[nr_probes].func = probe; | |
270 | new[nr_probes].probe_private = probe_private; | |
271 | entry->refcount = nr_probes + 1; | |
272 | entry->multi = new; | |
273 | entry->ptype = 1; | |
274 | debug_print_probes(entry); | |
275 | return old; | |
276 | } | |
277 | ||
278 | static struct marker_probe_closure * | |
279 | marker_entry_remove_probe(struct marker_entry *entry, | |
280 | marker_probe_func *probe, void *probe_private) | |
281 | { | |
282 | int nr_probes = 0, nr_del = 0, i; | |
283 | struct marker_probe_closure *old, *new; | |
284 | ||
285 | old = entry->multi; | |
286 | ||
287 | debug_print_probes(entry); | |
288 | if (!entry->ptype) { | |
289 | /* 0 -> N is an error */ | |
290 | WARN_ON(entry->single.func == __mark_empty_function); | |
291 | /* 1 -> 0 probes */ | |
292 | WARN_ON(probe && entry->single.func != probe); | |
293 | WARN_ON(entry->single.probe_private != probe_private); | |
294 | entry->single.func = __mark_empty_function; | |
295 | entry->refcount = 0; | |
296 | entry->ptype = 0; | |
297 | debug_print_probes(entry); | |
298 | return NULL; | |
299 | } else { | |
300 | /* (N -> M), (N > 1, M >= 0) probes */ | |
301 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | |
302 | if ((!probe || old[nr_probes].func == probe) | |
303 | && old[nr_probes].probe_private | |
304 | == probe_private) | |
305 | nr_del++; | |
306 | } | |
307 | } | |
308 | ||
309 | if (nr_probes - nr_del == 0) { | |
310 | /* N -> 0, (N > 1) */ | |
311 | entry->single.func = __mark_empty_function; | |
312 | entry->refcount = 0; | |
313 | entry->ptype = 0; | |
314 | } else if (nr_probes - nr_del == 1) { | |
315 | /* N -> 1, (N > 1) */ | |
316 | for (i = 0; old[i].func; i++) | |
317 | if ((probe && old[i].func != probe) || | |
318 | old[i].probe_private != probe_private) | |
319 | entry->single = old[i]; | |
320 | entry->refcount = 1; | |
321 | entry->ptype = 0; | |
322 | } else { | |
323 | int j = 0; | |
324 | /* N -> M, (N > 1, M > 1) */ | |
325 | /* + 1 for NULL */ | |
326 | new = kzalloc((nr_probes - nr_del + 1) | |
327 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | |
328 | if (new == NULL) | |
329 | return ERR_PTR(-ENOMEM); | |
330 | for (i = 0; old[i].func; i++) | |
331 | if ((probe && old[i].func != probe) || | |
332 | old[i].probe_private != probe_private) | |
333 | new[j++] = old[i]; | |
334 | entry->refcount = nr_probes - nr_del; | |
335 | entry->ptype = 1; | |
336 | entry->multi = new; | |
337 | } | |
338 | debug_print_probes(entry); | |
339 | return old; | |
340 | } | |
341 | ||
8256e47c MD |
342 | /* |
343 | * Get marker if the marker is present in the marker hash table. | |
344 | * Must be called with markers_mutex held. | |
345 | * Returns NULL if not present. | |
346 | */ | |
347 | static struct marker_entry *get_marker(const char *name) | |
348 | { | |
349 | struct hlist_head *head; | |
350 | struct hlist_node *node; | |
351 | struct marker_entry *e; | |
352 | u32 hash = jhash(name, strlen(name), 0); | |
353 | ||
354 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | |
355 | hlist_for_each_entry(e, node, head, hlist) { | |
356 | if (!strcmp(name, e->name)) | |
357 | return e; | |
358 | } | |
359 | return NULL; | |
360 | } | |
361 | ||
362 | /* | |
363 | * Add the marker to the marker hash table. Must be called with markers_mutex | |
364 | * held. | |
365 | */ | |
fb40bd78 | 366 | static struct marker_entry *add_marker(const char *name, const char *format) |
8256e47c MD |
367 | { |
368 | struct hlist_head *head; | |
369 | struct hlist_node *node; | |
370 | struct marker_entry *e; | |
371 | size_t name_len = strlen(name) + 1; | |
372 | size_t format_len = 0; | |
373 | u32 hash = jhash(name, name_len-1, 0); | |
374 | ||
375 | if (format) | |
376 | format_len = strlen(format) + 1; | |
377 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | |
378 | hlist_for_each_entry(e, node, head, hlist) { | |
379 | if (!strcmp(name, e->name)) { | |
380 | printk(KERN_NOTICE | |
fb40bd78 MD |
381 | "Marker %s busy\n", name); |
382 | return ERR_PTR(-EBUSY); /* Already there */ | |
8256e47c MD |
383 | } |
384 | } | |
385 | /* | |
386 | * Using kmalloc here to allocate a variable length element. Could | |
387 | * cause some memory fragmentation if overused. | |
388 | */ | |
389 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | |
390 | GFP_KERNEL); | |
391 | if (!e) | |
fb40bd78 | 392 | return ERR_PTR(-ENOMEM); |
8256e47c MD |
393 | memcpy(&e->name[0], name, name_len); |
394 | if (format) { | |
395 | e->format = &e->name[name_len]; | |
396 | memcpy(e->format, format, format_len); | |
fb40bd78 MD |
397 | if (strcmp(e->format, MARK_NOARGS) == 0) |
398 | e->call = marker_probe_cb_noarg; | |
399 | else | |
400 | e->call = marker_probe_cb; | |
8256e47c MD |
401 | trace_mark(core_marker_format, "name %s format %s", |
402 | e->name, e->format); | |
fb40bd78 | 403 | } else { |
8256e47c | 404 | e->format = NULL; |
fb40bd78 MD |
405 | e->call = marker_probe_cb; |
406 | } | |
407 | e->single.func = __mark_empty_function; | |
408 | e->single.probe_private = NULL; | |
409 | e->multi = NULL; | |
410 | e->ptype = 0; | |
8256e47c | 411 | e->refcount = 0; |
fb40bd78 | 412 | e->rcu_pending = 0; |
8256e47c | 413 | hlist_add_head(&e->hlist, head); |
fb40bd78 | 414 | return e; |
8256e47c MD |
415 | } |
416 | ||
417 | /* | |
418 | * Remove the marker from the marker hash table. Must be called with mutex_lock | |
419 | * held. | |
420 | */ | |
fb40bd78 | 421 | static int remove_marker(const char *name) |
8256e47c MD |
422 | { |
423 | struct hlist_head *head; | |
424 | struct hlist_node *node; | |
425 | struct marker_entry *e; | |
426 | int found = 0; | |
427 | size_t len = strlen(name) + 1; | |
8256e47c MD |
428 | u32 hash = jhash(name, len-1, 0); |
429 | ||
430 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | |
431 | hlist_for_each_entry(e, node, head, hlist) { | |
432 | if (!strcmp(name, e->name)) { | |
433 | found = 1; | |
434 | break; | |
435 | } | |
436 | } | |
fb40bd78 MD |
437 | if (!found) |
438 | return -ENOENT; | |
439 | if (e->single.func != __mark_empty_function) | |
440 | return -EBUSY; | |
441 | hlist_del(&e->hlist); | |
442 | /* Make sure the call_rcu has been executed */ | |
443 | if (e->rcu_pending) | |
444 | rcu_barrier(); | |
445 | kfree(e); | |
446 | return 0; | |
8256e47c MD |
447 | } |
448 | ||
449 | /* | |
450 | * Set the mark_entry format to the format found in the element. | |
451 | */ | |
452 | static int marker_set_format(struct marker_entry **entry, const char *format) | |
453 | { | |
454 | struct marker_entry *e; | |
455 | size_t name_len = strlen((*entry)->name) + 1; | |
456 | size_t format_len = strlen(format) + 1; | |
457 | ||
fb40bd78 | 458 | |
8256e47c MD |
459 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
460 | GFP_KERNEL); | |
461 | if (!e) | |
462 | return -ENOMEM; | |
463 | memcpy(&e->name[0], (*entry)->name, name_len); | |
464 | e->format = &e->name[name_len]; | |
465 | memcpy(e->format, format, format_len); | |
fb40bd78 MD |
466 | if (strcmp(e->format, MARK_NOARGS) == 0) |
467 | e->call = marker_probe_cb_noarg; | |
468 | else | |
469 | e->call = marker_probe_cb; | |
470 | e->single = (*entry)->single; | |
471 | e->multi = (*entry)->multi; | |
472 | e->ptype = (*entry)->ptype; | |
8256e47c | 473 | e->refcount = (*entry)->refcount; |
fb40bd78 | 474 | e->rcu_pending = 0; |
8256e47c MD |
475 | hlist_add_before(&e->hlist, &(*entry)->hlist); |
476 | hlist_del(&(*entry)->hlist); | |
fb40bd78 MD |
477 | /* Make sure the call_rcu has been executed */ |
478 | if ((*entry)->rcu_pending) | |
479 | rcu_barrier(); | |
8256e47c MD |
480 | kfree(*entry); |
481 | *entry = e; | |
482 | trace_mark(core_marker_format, "name %s format %s", | |
483 | e->name, e->format); | |
484 | return 0; | |
485 | } | |
486 | ||
487 | /* | |
488 | * Sets the probe callback corresponding to one marker. | |
489 | */ | |
fb40bd78 MD |
490 | static int set_marker(struct marker_entry **entry, struct marker *elem, |
491 | int active) | |
8256e47c MD |
492 | { |
493 | int ret; | |
494 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | |
495 | ||
496 | if ((*entry)->format) { | |
497 | if (strcmp((*entry)->format, elem->format) != 0) { | |
498 | printk(KERN_NOTICE | |
499 | "Format mismatch for probe %s " | |
500 | "(%s), marker (%s)\n", | |
501 | (*entry)->name, | |
502 | (*entry)->format, | |
503 | elem->format); | |
504 | return -EPERM; | |
505 | } | |
506 | } else { | |
507 | ret = marker_set_format(entry, elem->format); | |
508 | if (ret) | |
509 | return ret; | |
510 | } | |
fb40bd78 MD |
511 | |
512 | /* | |
513 | * probe_cb setup (statically known) is done here. It is | |
514 | * asynchronous with the rest of execution, therefore we only | |
515 | * pass from a "safe" callback (with argument) to an "unsafe" | |
516 | * callback (does not set arguments). | |
517 | */ | |
518 | elem->call = (*entry)->call; | |
519 | /* | |
520 | * Sanity check : | |
521 | * We only update the single probe private data when the ptr is | |
522 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | |
523 | */ | |
524 | WARN_ON(elem->single.func != __mark_empty_function | |
525 | && elem->single.probe_private | |
526 | != (*entry)->single.probe_private && | |
527 | !elem->ptype); | |
528 | elem->single.probe_private = (*entry)->single.probe_private; | |
529 | /* | |
530 | * Make sure the private data is valid when we update the | |
531 | * single probe ptr. | |
532 | */ | |
533 | smp_wmb(); | |
534 | elem->single.func = (*entry)->single.func; | |
535 | /* | |
536 | * We also make sure that the new probe callbacks array is consistent | |
537 | * before setting a pointer to it. | |
538 | */ | |
539 | rcu_assign_pointer(elem->multi, (*entry)->multi); | |
540 | /* | |
541 | * Update the function or multi probe array pointer before setting the | |
542 | * ptype. | |
543 | */ | |
544 | smp_wmb(); | |
545 | elem->ptype = (*entry)->ptype; | |
546 | elem->state = active; | |
547 | ||
8256e47c MD |
548 | return 0; |
549 | } | |
550 | ||
551 | /* | |
552 | * Disable a marker and its probe callback. | |
fd3c36f8 MD |
553 | * Note: only waiting an RCU period after setting elem->call to the empty |
554 | * function insures that the original callback is not used anymore. This insured | |
555 | * by preempt_disable around the call site. | |
8256e47c MD |
556 | */ |
557 | static void disable_marker(struct marker *elem) | |
558 | { | |
fb40bd78 | 559 | /* leave "call" as is. It is known statically. */ |
8256e47c | 560 | elem->state = 0; |
fb40bd78 MD |
561 | elem->single.func = __mark_empty_function; |
562 | /* Update the function before setting the ptype */ | |
563 | smp_wmb(); | |
564 | elem->ptype = 0; /* single probe */ | |
8256e47c MD |
565 | /* |
566 | * Leave the private data and id there, because removal is racy and | |
fd3c36f8 MD |
567 | * should be done only after an RCU period. These are never used until |
568 | * the next initialization anyway. | |
8256e47c MD |
569 | */ |
570 | } | |
571 | ||
572 | /** | |
573 | * marker_update_probe_range - Update a probe range | |
574 | * @begin: beginning of the range | |
575 | * @end: end of the range | |
8256e47c MD |
576 | * |
577 | * Updates the probe callback corresponding to a range of markers. | |
8256e47c MD |
578 | */ |
579 | void marker_update_probe_range(struct marker *begin, | |
fb40bd78 | 580 | struct marker *end) |
8256e47c MD |
581 | { |
582 | struct marker *iter; | |
583 | struct marker_entry *mark_entry; | |
584 | ||
314de8a9 | 585 | mutex_lock(&markers_mutex); |
8256e47c MD |
586 | for (iter = begin; iter < end; iter++) { |
587 | mark_entry = get_marker(iter->name); | |
fb40bd78 MD |
588 | if (mark_entry) { |
589 | set_marker(&mark_entry, iter, | |
590 | !!mark_entry->refcount); | |
8256e47c MD |
591 | /* |
592 | * ignore error, continue | |
593 | */ | |
8256e47c MD |
594 | } else { |
595 | disable_marker(iter); | |
596 | } | |
597 | } | |
314de8a9 | 598 | mutex_unlock(&markers_mutex); |
8256e47c MD |
599 | } |
600 | ||
601 | /* | |
602 | * Update probes, removing the faulty probes. | |
fb40bd78 MD |
603 | * |
604 | * Internal callback only changed before the first probe is connected to it. | |
605 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | |
606 | * transitions. All other transitions will leave the old private data valid. | |
607 | * This makes the non-atomicity of the callback/private data updates valid. | |
608 | * | |
609 | * "special case" updates : | |
610 | * 0 -> 1 callback | |
611 | * 1 -> 0 callback | |
612 | * 1 -> 2 callbacks | |
613 | * 2 -> 1 callbacks | |
614 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | |
615 | * Site effect : marker_set_format may delete the marker entry (creating a | |
616 | * replacement). | |
8256e47c | 617 | */ |
fb40bd78 | 618 | static void marker_update_probes(void) |
8256e47c | 619 | { |
8256e47c | 620 | /* Core kernel markers */ |
fb40bd78 | 621 | marker_update_probe_range(__start___markers, __stop___markers); |
8256e47c | 622 | /* Markers in modules. */ |
fb40bd78 | 623 | module_update_markers(); |
8256e47c MD |
624 | } |
625 | ||
626 | /** | |
627 | * marker_probe_register - Connect a probe to a marker | |
628 | * @name: marker name | |
629 | * @format: format string | |
630 | * @probe: probe handler | |
fb40bd78 | 631 | * @probe_private: probe private data |
8256e47c MD |
632 | * |
633 | * private data must be a valid allocated memory address, or NULL. | |
634 | * Returns 0 if ok, error value on error. | |
fb40bd78 | 635 | * The probe address must at least be aligned on the architecture pointer size. |
8256e47c MD |
636 | */ |
637 | int marker_probe_register(const char *name, const char *format, | |
fb40bd78 | 638 | marker_probe_func *probe, void *probe_private) |
8256e47c MD |
639 | { |
640 | struct marker_entry *entry; | |
314de8a9 | 641 | int ret = 0; |
fb40bd78 | 642 | struct marker_probe_closure *old; |
8256e47c MD |
643 | |
644 | mutex_lock(&markers_mutex); | |
645 | entry = get_marker(name); | |
fb40bd78 MD |
646 | if (!entry) { |
647 | entry = add_marker(name, format); | |
648 | if (IS_ERR(entry)) { | |
649 | ret = PTR_ERR(entry); | |
650 | goto end; | |
651 | } | |
8256e47c | 652 | } |
fb40bd78 MD |
653 | /* |
654 | * If we detect that a call_rcu is pending for this marker, | |
655 | * make sure it's executed now. | |
656 | */ | |
657 | if (entry->rcu_pending) | |
658 | rcu_barrier(); | |
659 | old = marker_entry_add_probe(entry, probe, probe_private); | |
660 | if (IS_ERR(old)) { | |
661 | ret = PTR_ERR(old); | |
8256e47c | 662 | goto end; |
fb40bd78 | 663 | } |
314de8a9 | 664 | mutex_unlock(&markers_mutex); |
fb40bd78 MD |
665 | marker_update_probes(); /* may update entry */ |
666 | mutex_lock(&markers_mutex); | |
667 | entry = get_marker(name); | |
668 | WARN_ON(!entry); | |
669 | entry->oldptr = old; | |
670 | entry->rcu_pending = 1; | |
671 | /* write rcu_pending before calling the RCU callback */ | |
672 | smp_wmb(); | |
6496968e MD |
673 | #ifdef CONFIG_PREEMPT_RCU |
674 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | |
675 | #endif | |
fb40bd78 | 676 | call_rcu(&entry->rcu, free_old_closure); |
8256e47c MD |
677 | end: |
678 | mutex_unlock(&markers_mutex); | |
8256e47c MD |
679 | return ret; |
680 | } | |
681 | EXPORT_SYMBOL_GPL(marker_probe_register); | |
682 | ||
683 | /** | |
684 | * marker_probe_unregister - Disconnect a probe from a marker | |
685 | * @name: marker name | |
fb40bd78 MD |
686 | * @probe: probe function pointer |
687 | * @probe_private: probe private data | |
8256e47c MD |
688 | * |
689 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | |
fb40bd78 MD |
690 | * We do not need to call a synchronize_sched to make sure the probes have |
691 | * finished running before doing a module unload, because the module unload | |
692 | * itself uses stop_machine(), which insures that every preempt disabled section | |
693 | * have finished. | |
8256e47c | 694 | */ |
fb40bd78 MD |
695 | int marker_probe_unregister(const char *name, |
696 | marker_probe_func *probe, void *probe_private) | |
8256e47c | 697 | { |
8256e47c | 698 | struct marker_entry *entry; |
fb40bd78 | 699 | struct marker_probe_closure *old; |
544adb41 | 700 | int ret = -ENOENT; |
8256e47c MD |
701 | |
702 | mutex_lock(&markers_mutex); | |
703 | entry = get_marker(name); | |
544adb41 | 704 | if (!entry) |
8256e47c | 705 | goto end; |
fb40bd78 MD |
706 | if (entry->rcu_pending) |
707 | rcu_barrier(); | |
708 | old = marker_entry_remove_probe(entry, probe, probe_private); | |
314de8a9 | 709 | mutex_unlock(&markers_mutex); |
fb40bd78 MD |
710 | marker_update_probes(); /* may update entry */ |
711 | mutex_lock(&markers_mutex); | |
712 | entry = get_marker(name); | |
544adb41 JJ |
713 | if (!entry) |
714 | goto end; | |
fb40bd78 MD |
715 | entry->oldptr = old; |
716 | entry->rcu_pending = 1; | |
717 | /* write rcu_pending before calling the RCU callback */ | |
718 | smp_wmb(); | |
6496968e MD |
719 | #ifdef CONFIG_PREEMPT_RCU |
720 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | |
721 | #endif | |
fb40bd78 MD |
722 | call_rcu(&entry->rcu, free_old_closure); |
723 | remove_marker(name); /* Ignore busy error message */ | |
544adb41 | 724 | ret = 0; |
8256e47c MD |
725 | end: |
726 | mutex_unlock(&markers_mutex); | |
fb40bd78 | 727 | return ret; |
8256e47c MD |
728 | } |
729 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | |
730 | ||
fb40bd78 MD |
731 | static struct marker_entry * |
732 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) | |
8256e47c | 733 | { |
8256e47c | 734 | struct marker_entry *entry; |
8256e47c | 735 | unsigned int i; |
fb40bd78 MD |
736 | struct hlist_head *head; |
737 | struct hlist_node *node; | |
8256e47c | 738 | |
8256e47c MD |
739 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { |
740 | head = &marker_table[i]; | |
741 | hlist_for_each_entry(entry, node, head, hlist) { | |
fb40bd78 MD |
742 | if (!entry->ptype) { |
743 | if (entry->single.func == probe | |
744 | && entry->single.probe_private | |
745 | == probe_private) | |
746 | return entry; | |
747 | } else { | |
748 | struct marker_probe_closure *closure; | |
749 | closure = entry->multi; | |
750 | for (i = 0; closure[i].func; i++) { | |
751 | if (closure[i].func == probe && | |
752 | closure[i].probe_private | |
753 | == probe_private) | |
754 | return entry; | |
755 | } | |
8256e47c MD |
756 | } |
757 | } | |
758 | } | |
fb40bd78 | 759 | return NULL; |
8256e47c | 760 | } |
8256e47c MD |
761 | |
762 | /** | |
fb40bd78 MD |
763 | * marker_probe_unregister_private_data - Disconnect a probe from a marker |
764 | * @probe: probe function | |
765 | * @probe_private: probe private data | |
8256e47c | 766 | * |
fb40bd78 MD |
767 | * Unregister a probe by providing the registered private data. |
768 | * Only removes the first marker found in hash table. | |
769 | * Return 0 on success or error value. | |
770 | * We do not need to call a synchronize_sched to make sure the probes have | |
771 | * finished running before doing a module unload, because the module unload | |
772 | * itself uses stop_machine(), which insures that every preempt disabled section | |
773 | * have finished. | |
8256e47c | 774 | */ |
fb40bd78 MD |
775 | int marker_probe_unregister_private_data(marker_probe_func *probe, |
776 | void *probe_private) | |
8256e47c MD |
777 | { |
778 | struct marker_entry *entry; | |
314de8a9 | 779 | int ret = 0; |
fb40bd78 | 780 | struct marker_probe_closure *old; |
8256e47c MD |
781 | |
782 | mutex_lock(&markers_mutex); | |
fb40bd78 | 783 | entry = get_marker_from_private_data(probe, probe_private); |
8256e47c MD |
784 | if (!entry) { |
785 | ret = -ENOENT; | |
786 | goto end; | |
787 | } | |
fb40bd78 MD |
788 | if (entry->rcu_pending) |
789 | rcu_barrier(); | |
790 | old = marker_entry_remove_probe(entry, NULL, probe_private); | |
8256e47c | 791 | mutex_unlock(&markers_mutex); |
fb40bd78 | 792 | marker_update_probes(); /* may update entry */ |
8256e47c | 793 | mutex_lock(&markers_mutex); |
fb40bd78 MD |
794 | entry = get_marker_from_private_data(probe, probe_private); |
795 | WARN_ON(!entry); | |
796 | entry->oldptr = old; | |
797 | entry->rcu_pending = 1; | |
798 | /* write rcu_pending before calling the RCU callback */ | |
799 | smp_wmb(); | |
6496968e MD |
800 | #ifdef CONFIG_PREEMPT_RCU |
801 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | |
802 | #endif | |
fb40bd78 MD |
803 | call_rcu(&entry->rcu, free_old_closure); |
804 | remove_marker(entry->name); /* Ignore busy error message */ | |
8256e47c MD |
805 | end: |
806 | mutex_unlock(&markers_mutex); | |
8256e47c MD |
807 | return ret; |
808 | } | |
fb40bd78 | 809 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); |
8256e47c MD |
810 | |
811 | /** | |
812 | * marker_get_private_data - Get a marker's probe private data | |
813 | * @name: marker name | |
fb40bd78 MD |
814 | * @probe: probe to match |
815 | * @num: get the nth matching probe's private data | |
8256e47c | 816 | * |
fb40bd78 MD |
817 | * Returns the nth private data pointer (starting from 0) matching, or an |
818 | * ERR_PTR. | |
8256e47c MD |
819 | * Returns the private data pointer, or an ERR_PTR. |
820 | * The private data pointer should _only_ be dereferenced if the caller is the | |
821 | * owner of the data, or its content could vanish. This is mostly used to | |
822 | * confirm that a caller is the owner of a registered probe. | |
823 | */ | |
fb40bd78 MD |
824 | void *marker_get_private_data(const char *name, marker_probe_func *probe, |
825 | int num) | |
8256e47c MD |
826 | { |
827 | struct hlist_head *head; | |
828 | struct hlist_node *node; | |
829 | struct marker_entry *e; | |
830 | size_t name_len = strlen(name) + 1; | |
831 | u32 hash = jhash(name, name_len-1, 0); | |
fb40bd78 | 832 | int i; |
8256e47c MD |
833 | |
834 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | |
835 | hlist_for_each_entry(e, node, head, hlist) { | |
836 | if (!strcmp(name, e->name)) { | |
fb40bd78 MD |
837 | if (!e->ptype) { |
838 | if (num == 0 && e->single.func == probe) | |
839 | return e->single.probe_private; | |
840 | else | |
841 | break; | |
842 | } else { | |
843 | struct marker_probe_closure *closure; | |
844 | int match = 0; | |
845 | closure = e->multi; | |
846 | for (i = 0; closure[i].func; i++) { | |
847 | if (closure[i].func != probe) | |
848 | continue; | |
849 | if (match++ == num) | |
850 | return closure[i].probe_private; | |
851 | } | |
852 | } | |
8256e47c MD |
853 | } |
854 | } | |
855 | return ERR_PTR(-ENOENT); | |
856 | } | |
857 | EXPORT_SYMBOL_GPL(marker_get_private_data); |