]>
Commit | Line | Data |
---|---|---|
bcea3f96 | 1 | // SPDX-License-Identifier: GPL-2.0 |
08d43a5f TZ |
2 | /* |
3 | * tracing_map - lock-free map for tracing | |
4 | * | |
08d43a5f TZ |
5 | * Copyright (C) 2015 Tom Zanussi <[email protected]> |
6 | * | |
7 | * tracing_map implementation inspired by lock-free map algorithms | |
8 | * originated by Dr. Cliff Click: | |
9 | * | |
10 | * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable | |
11 | * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf | |
12 | */ | |
13 | ||
14 | #include <linux/vmalloc.h> | |
15 | #include <linux/jhash.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/sort.h> | |
18 | ||
19 | #include "tracing_map.h" | |
20 | #include "trace.h" | |
21 | ||
22 | /* | |
23 | * NOTE: For a detailed description of the data structures used by | |
24 | * these functions (such as tracing_map_elt) please see the overview | |
25 | * of tracing_map data structures at the beginning of tracing_map.h. | |
26 | */ | |
27 | ||
28 | /** | |
29 | * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field | |
30 | * @elt: The tracing_map_elt | |
31 | * @i: The index of the given sum associated with the tracing_map_elt | |
32 | * @n: The value to add to the sum | |
33 | * | |
34 | * Add n to sum i associated with the specified tracing_map_elt | |
35 | * instance. The index i is the index returned by the call to | |
36 | * tracing_map_add_sum_field() when the tracing map was set up. | |
37 | */ | |
38 | void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
39 | { | |
40 | atomic64_add(n, &elt->fields[i].sum); | |
41 | } | |
42 | ||
43 | /** | |
44 | * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field | |
45 | * @elt: The tracing_map_elt | |
46 | * @i: The index of the given sum associated with the tracing_map_elt | |
47 | * | |
48 | * Retrieve the value of the sum i associated with the specified | |
49 | * tracing_map_elt instance. The index i is the index returned by the | |
50 | * call to tracing_map_add_sum_field() when the tracing map was set | |
51 | * up. | |
52 | * | |
53 | * Return: The sum associated with field i for elt. | |
54 | */ | |
55 | u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) | |
56 | { | |
57 | return (u64)atomic64_read(&elt->fields[i].sum); | |
58 | } | |
59 | ||
2734b629 TZ |
60 | /** |
61 | * tracing_map_set_var - Assign a tracing_map_elt's variable field | |
62 | * @elt: The tracing_map_elt | |
63 | * @i: The index of the given variable associated with the tracing_map_elt | |
64 | * @n: The value to assign | |
65 | * | |
66 | * Assign n to variable i associated with the specified tracing_map_elt | |
67 | * instance. The index i is the index returned by the call to | |
68 | * tracing_map_add_var() when the tracing map was set up. | |
69 | */ | |
70 | void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
71 | { | |
72 | atomic64_set(&elt->vars[i], n); | |
73 | elt->var_set[i] = true; | |
74 | } | |
75 | ||
76 | /** | |
77 | * tracing_map_var_set - Return whether or not a variable has been set | |
78 | * @elt: The tracing_map_elt | |
79 | * @i: The index of the given variable associated with the tracing_map_elt | |
80 | * | |
81 | * Return true if the variable has been set, false otherwise. The | |
82 | * index i is the index returned by the call to tracing_map_add_var() | |
83 | * when the tracing map was set up. | |
84 | */ | |
85 | bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) | |
86 | { | |
87 | return elt->var_set[i]; | |
88 | } | |
89 | ||
90 | /** | |
91 | * tracing_map_read_var - Return the value of a tracing_map_elt's variable field | |
92 | * @elt: The tracing_map_elt | |
93 | * @i: The index of the given variable associated with the tracing_map_elt | |
94 | * | |
95 | * Retrieve the value of the variable i associated with the specified | |
96 | * tracing_map_elt instance. The index i is the index returned by the | |
97 | * call to tracing_map_add_var() when the tracing map was set | |
98 | * up. | |
99 | * | |
100 | * Return: The variable value associated with field i for elt. | |
101 | */ | |
102 | u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) | |
103 | { | |
104 | return (u64)atomic64_read(&elt->vars[i]); | |
105 | } | |
106 | ||
107 | /** | |
108 | * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field | |
109 | * @elt: The tracing_map_elt | |
110 | * @i: The index of the given variable associated with the tracing_map_elt | |
111 | * | |
112 | * Retrieve the value of the variable i associated with the specified | |
113 | * tracing_map_elt instance, and reset the variable to the 'not set' | |
114 | * state. The index i is the index returned by the call to | |
115 | * tracing_map_add_var() when the tracing map was set up. The reset | |
116 | * essentially makes the variable a read-once variable if it's only | |
117 | * accessed using this function. | |
118 | * | |
119 | * Return: The variable value associated with field i for elt. | |
120 | */ | |
121 | u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) | |
122 | { | |
123 | elt->var_set[i] = false; | |
124 | return (u64)atomic64_read(&elt->vars[i]); | |
125 | } | |
126 | ||
08d43a5f TZ |
127 | int tracing_map_cmp_string(void *val_a, void *val_b) |
128 | { | |
129 | char *a = val_a; | |
130 | char *b = val_b; | |
131 | ||
132 | return strcmp(a, b); | |
133 | } | |
134 | ||
135 | int tracing_map_cmp_none(void *val_a, void *val_b) | |
136 | { | |
137 | return 0; | |
138 | } | |
139 | ||
140 | static int tracing_map_cmp_atomic64(void *val_a, void *val_b) | |
141 | { | |
142 | u64 a = atomic64_read((atomic64_t *)val_a); | |
143 | u64 b = atomic64_read((atomic64_t *)val_b); | |
144 | ||
145 | return (a > b) ? 1 : ((a < b) ? -1 : 0); | |
146 | } | |
147 | ||
148 | #define DEFINE_TRACING_MAP_CMP_FN(type) \ | |
149 | static int tracing_map_cmp_##type(void *val_a, void *val_b) \ | |
150 | { \ | |
151 | type a = *(type *)val_a; \ | |
152 | type b = *(type *)val_b; \ | |
153 | \ | |
154 | return (a > b) ? 1 : ((a < b) ? -1 : 0); \ | |
155 | } | |
156 | ||
157 | DEFINE_TRACING_MAP_CMP_FN(s64); | |
158 | DEFINE_TRACING_MAP_CMP_FN(u64); | |
159 | DEFINE_TRACING_MAP_CMP_FN(s32); | |
160 | DEFINE_TRACING_MAP_CMP_FN(u32); | |
161 | DEFINE_TRACING_MAP_CMP_FN(s16); | |
162 | DEFINE_TRACING_MAP_CMP_FN(u16); | |
163 | DEFINE_TRACING_MAP_CMP_FN(s8); | |
164 | DEFINE_TRACING_MAP_CMP_FN(u8); | |
165 | ||
166 | tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, | |
167 | int field_is_signed) | |
168 | { | |
169 | tracing_map_cmp_fn_t fn = tracing_map_cmp_none; | |
170 | ||
171 | switch (field_size) { | |
172 | case 8: | |
173 | if (field_is_signed) | |
174 | fn = tracing_map_cmp_s64; | |
175 | else | |
176 | fn = tracing_map_cmp_u64; | |
177 | break; | |
178 | case 4: | |
179 | if (field_is_signed) | |
180 | fn = tracing_map_cmp_s32; | |
181 | else | |
182 | fn = tracing_map_cmp_u32; | |
183 | break; | |
184 | case 2: | |
185 | if (field_is_signed) | |
186 | fn = tracing_map_cmp_s16; | |
187 | else | |
188 | fn = tracing_map_cmp_u16; | |
189 | break; | |
190 | case 1: | |
191 | if (field_is_signed) | |
192 | fn = tracing_map_cmp_s8; | |
193 | else | |
194 | fn = tracing_map_cmp_u8; | |
195 | break; | |
196 | } | |
197 | ||
198 | return fn; | |
199 | } | |
200 | ||
201 | static int tracing_map_add_field(struct tracing_map *map, | |
202 | tracing_map_cmp_fn_t cmp_fn) | |
203 | { | |
204 | int ret = -EINVAL; | |
205 | ||
206 | if (map->n_fields < TRACING_MAP_FIELDS_MAX) { | |
207 | ret = map->n_fields; | |
208 | map->fields[map->n_fields++].cmp_fn = cmp_fn; | |
209 | } | |
210 | ||
211 | return ret; | |
212 | } | |
213 | ||
214 | /** | |
215 | * tracing_map_add_sum_field - Add a field describing a tracing_map sum | |
216 | * @map: The tracing_map | |
217 | * | |
218 | * Add a sum field to the key and return the index identifying it in | |
219 | * the map and associated tracing_map_elts. This is the index used | |
220 | * for instance to update a sum for a particular tracing_map_elt using | |
221 | * tracing_map_update_sum() or reading it via tracing_map_read_sum(). | |
222 | * | |
223 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 224 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
225 | */ |
226 | int tracing_map_add_sum_field(struct tracing_map *map) | |
227 | { | |
228 | return tracing_map_add_field(map, tracing_map_cmp_atomic64); | |
229 | } | |
230 | ||
2734b629 TZ |
231 | /** |
232 | * tracing_map_add_var - Add a field describing a tracing_map var | |
233 | * @map: The tracing_map | |
234 | * | |
235 | * Add a var to the map and return the index identifying it in the map | |
236 | * and associated tracing_map_elts. This is the index used for | |
237 | * instance to update a var for a particular tracing_map_elt using | |
238 | * tracing_map_update_var() or reading it via tracing_map_read_var(). | |
239 | * | |
240 | * Return: The index identifying the var in the map and associated | |
241 | * tracing_map_elts, or -EINVAL on error. | |
242 | */ | |
243 | int tracing_map_add_var(struct tracing_map *map) | |
244 | { | |
245 | int ret = -EINVAL; | |
246 | ||
247 | if (map->n_vars < TRACING_MAP_VARS_MAX) | |
248 | ret = map->n_vars++; | |
249 | ||
250 | return ret; | |
251 | } | |
252 | ||
08d43a5f TZ |
253 | /** |
254 | * tracing_map_add_key_field - Add a field describing a tracing_map key | |
255 | * @map: The tracing_map | |
256 | * @offset: The offset within the key | |
257 | * @cmp_fn: The comparison function that will be used to sort on the key | |
258 | * | |
259 | * Let the map know there is a key and that if it's used as a sort key | |
260 | * to use cmp_fn. | |
261 | * | |
262 | * A key can be a subset of a compound key; for that purpose, the | |
263 | * offset param is used to describe where within the the compound key | |
264 | * the key referenced by this key field resides. | |
265 | * | |
266 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 267 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
268 | */ |
269 | int tracing_map_add_key_field(struct tracing_map *map, | |
270 | unsigned int offset, | |
271 | tracing_map_cmp_fn_t cmp_fn) | |
272 | ||
273 | { | |
274 | int idx = tracing_map_add_field(map, cmp_fn); | |
275 | ||
276 | if (idx < 0) | |
277 | return idx; | |
278 | ||
279 | map->fields[idx].offset = offset; | |
280 | ||
281 | map->key_idx[map->n_keys++] = idx; | |
282 | ||
283 | return idx; | |
284 | } | |
285 | ||
286 | void tracing_map_array_clear(struct tracing_map_array *a) | |
287 | { | |
288 | unsigned int i; | |
289 | ||
290 | if (!a->pages) | |
291 | return; | |
292 | ||
293 | for (i = 0; i < a->n_pages; i++) | |
294 | memset(a->pages[i], 0, PAGE_SIZE); | |
295 | } | |
296 | ||
297 | void tracing_map_array_free(struct tracing_map_array *a) | |
298 | { | |
299 | unsigned int i; | |
300 | ||
301 | if (!a) | |
302 | return; | |
303 | ||
475bb3c6 CH |
304 | if (!a->pages) |
305 | goto free; | |
08d43a5f TZ |
306 | |
307 | for (i = 0; i < a->n_pages; i++) { | |
308 | if (!a->pages[i]) | |
309 | break; | |
310 | free_page((unsigned long)a->pages[i]); | |
311 | } | |
475bb3c6 CH |
312 | |
313 | kfree(a->pages); | |
314 | ||
315 | free: | |
316 | kfree(a); | |
08d43a5f TZ |
317 | } |
318 | ||
319 | struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, | |
320 | unsigned int entry_size) | |
321 | { | |
322 | struct tracing_map_array *a; | |
323 | unsigned int i; | |
324 | ||
325 | a = kzalloc(sizeof(*a), GFP_KERNEL); | |
326 | if (!a) | |
327 | return NULL; | |
328 | ||
329 | a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); | |
330 | a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); | |
331 | a->n_pages = n_elts / a->entries_per_page; | |
332 | if (!a->n_pages) | |
333 | a->n_pages = 1; | |
334 | a->entry_shift = fls(a->entries_per_page) - 1; | |
335 | a->entry_mask = (1 << a->entry_shift) - 1; | |
336 | ||
337 | a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL); | |
338 | if (!a->pages) | |
339 | goto free; | |
340 | ||
341 | for (i = 0; i < a->n_pages; i++) { | |
342 | a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | |
343 | if (!a->pages[i]) | |
344 | goto free; | |
345 | } | |
346 | out: | |
347 | return a; | |
348 | free: | |
349 | tracing_map_array_free(a); | |
350 | a = NULL; | |
351 | ||
352 | goto out; | |
353 | } | |
354 | ||
355 | static void tracing_map_elt_clear(struct tracing_map_elt *elt) | |
356 | { | |
357 | unsigned i; | |
358 | ||
359 | for (i = 0; i < elt->map->n_fields; i++) | |
360 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | |
361 | atomic64_set(&elt->fields[i].sum, 0); | |
362 | ||
2734b629 TZ |
363 | for (i = 0; i < elt->map->n_vars; i++) { |
364 | atomic64_set(&elt->vars[i], 0); | |
365 | elt->var_set[i] = false; | |
366 | } | |
367 | ||
08d43a5f TZ |
368 | if (elt->map->ops && elt->map->ops->elt_clear) |
369 | elt->map->ops->elt_clear(elt); | |
370 | } | |
371 | ||
372 | static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) | |
373 | { | |
374 | unsigned int i; | |
375 | ||
376 | tracing_map_elt_clear(elt); | |
377 | ||
378 | for (i = 0; i < elt->map->n_fields; i++) { | |
379 | elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn; | |
380 | ||
381 | if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64) | |
382 | elt->fields[i].offset = elt->map->fields[i].offset; | |
383 | } | |
384 | } | |
385 | ||
386 | static void tracing_map_elt_free(struct tracing_map_elt *elt) | |
387 | { | |
388 | if (!elt) | |
389 | return; | |
390 | ||
391 | if (elt->map->ops && elt->map->ops->elt_free) | |
392 | elt->map->ops->elt_free(elt); | |
393 | kfree(elt->fields); | |
2734b629 TZ |
394 | kfree(elt->vars); |
395 | kfree(elt->var_set); | |
08d43a5f TZ |
396 | kfree(elt->key); |
397 | kfree(elt); | |
398 | } | |
399 | ||
400 | static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) | |
401 | { | |
402 | struct tracing_map_elt *elt; | |
403 | int err = 0; | |
404 | ||
405 | elt = kzalloc(sizeof(*elt), GFP_KERNEL); | |
406 | if (!elt) | |
407 | return ERR_PTR(-ENOMEM); | |
408 | ||
409 | elt->map = map; | |
410 | ||
411 | elt->key = kzalloc(map->key_size, GFP_KERNEL); | |
412 | if (!elt->key) { | |
413 | err = -ENOMEM; | |
414 | goto free; | |
415 | } | |
416 | ||
417 | elt->fields = kcalloc(map->n_fields, sizeof(*elt->fields), GFP_KERNEL); | |
418 | if (!elt->fields) { | |
419 | err = -ENOMEM; | |
420 | goto free; | |
421 | } | |
422 | ||
2734b629 TZ |
423 | elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); |
424 | if (!elt->vars) { | |
425 | err = -ENOMEM; | |
426 | goto free; | |
427 | } | |
428 | ||
429 | elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); | |
430 | if (!elt->var_set) { | |
431 | err = -ENOMEM; | |
432 | goto free; | |
433 | } | |
434 | ||
08d43a5f TZ |
435 | tracing_map_elt_init_fields(elt); |
436 | ||
437 | if (map->ops && map->ops->elt_alloc) { | |
438 | err = map->ops->elt_alloc(elt); | |
439 | if (err) | |
440 | goto free; | |
441 | } | |
442 | return elt; | |
443 | free: | |
444 | tracing_map_elt_free(elt); | |
445 | ||
446 | return ERR_PTR(err); | |
447 | } | |
448 | ||
449 | static struct tracing_map_elt *get_free_elt(struct tracing_map *map) | |
450 | { | |
451 | struct tracing_map_elt *elt = NULL; | |
452 | int idx; | |
453 | ||
454 | idx = atomic_inc_return(&map->next_elt); | |
455 | if (idx < map->max_elts) { | |
456 | elt = *(TRACING_MAP_ELT(map->elts, idx)); | |
457 | if (map->ops && map->ops->elt_init) | |
458 | map->ops->elt_init(elt); | |
459 | } | |
460 | ||
461 | return elt; | |
462 | } | |
463 | ||
464 | static void tracing_map_free_elts(struct tracing_map *map) | |
465 | { | |
466 | unsigned int i; | |
467 | ||
468 | if (!map->elts) | |
469 | return; | |
470 | ||
6e4cf657 | 471 | for (i = 0; i < map->max_elts; i++) { |
08d43a5f | 472 | tracing_map_elt_free(*(TRACING_MAP_ELT(map->elts, i))); |
6e4cf657 TZ |
473 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
474 | } | |
08d43a5f TZ |
475 | |
476 | tracing_map_array_free(map->elts); | |
6e4cf657 | 477 | map->elts = NULL; |
08d43a5f TZ |
478 | } |
479 | ||
480 | static int tracing_map_alloc_elts(struct tracing_map *map) | |
481 | { | |
482 | unsigned int i; | |
483 | ||
484 | map->elts = tracing_map_array_alloc(map->max_elts, | |
485 | sizeof(struct tracing_map_elt *)); | |
486 | if (!map->elts) | |
487 | return -ENOMEM; | |
488 | ||
489 | for (i = 0; i < map->max_elts; i++) { | |
490 | *(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map); | |
6e4cf657 TZ |
491 | if (IS_ERR(*(TRACING_MAP_ELT(map->elts, i)))) { |
492 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; | |
08d43a5f TZ |
493 | tracing_map_free_elts(map); |
494 | ||
495 | return -ENOMEM; | |
496 | } | |
497 | } | |
498 | ||
499 | return 0; | |
500 | } | |
501 | ||
502 | static inline bool keys_match(void *key, void *test_key, unsigned key_size) | |
503 | { | |
504 | bool match = true; | |
505 | ||
506 | if (memcmp(key, test_key, key_size)) | |
507 | match = false; | |
508 | ||
509 | return match; | |
510 | } | |
511 | ||
512 | static inline struct tracing_map_elt * | |
513 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |
514 | { | |
515 | u32 idx, key_hash, test_key; | |
cbf4100e | 516 | int dup_try = 0; |
08d43a5f | 517 | struct tracing_map_entry *entry; |
cbf4100e | 518 | struct tracing_map_elt *val; |
08d43a5f TZ |
519 | |
520 | key_hash = jhash(key, map->key_size, 0); | |
521 | if (key_hash == 0) | |
522 | key_hash = 1; | |
523 | idx = key_hash >> (32 - (map->map_bits + 1)); | |
524 | ||
525 | while (1) { | |
526 | idx &= (map->map_size - 1); | |
527 | entry = TRACING_MAP_ENTRY(map->map, idx); | |
528 | test_key = entry->key; | |
529 | ||
cbf4100e VP |
530 | if (test_key && test_key == key_hash) { |
531 | val = READ_ONCE(entry->val); | |
532 | if (val && | |
533 | keys_match(key, val->key, map->key_size)) { | |
534 | if (!lookup_only) | |
535 | atomic64_inc(&map->hits); | |
536 | return val; | |
537 | } else if (unlikely(!val)) { | |
538 | /* | |
539 | * The key is present. But, val (pointer to elt | |
540 | * struct) is still NULL. which means some other | |
541 | * thread is in the process of inserting an | |
542 | * element. | |
543 | * | |
544 | * On top of that, it's key_hash is same as the | |
545 | * one being inserted right now. So, it's | |
546 | * possible that the element has the same | |
547 | * key as well. | |
548 | */ | |
549 | ||
550 | dup_try++; | |
551 | if (dup_try > map->map_size) { | |
552 | atomic64_inc(&map->drops); | |
553 | break; | |
554 | } | |
555 | continue; | |
556 | } | |
08d43a5f TZ |
557 | } |
558 | ||
559 | if (!test_key) { | |
560 | if (lookup_only) | |
561 | break; | |
562 | ||
563 | if (!cmpxchg(&entry->key, 0, key_hash)) { | |
564 | struct tracing_map_elt *elt; | |
565 | ||
566 | elt = get_free_elt(map); | |
567 | if (!elt) { | |
568 | atomic64_inc(&map->drops); | |
569 | entry->key = 0; | |
570 | break; | |
571 | } | |
572 | ||
573 | memcpy(elt->key, key, map->key_size); | |
574 | entry->val = elt; | |
575 | atomic64_inc(&map->hits); | |
576 | ||
577 | return entry->val; | |
cbf4100e VP |
578 | } else { |
579 | /* | |
580 | * cmpxchg() failed. Loop around once | |
581 | * more to check what key was inserted. | |
582 | */ | |
583 | dup_try++; | |
584 | continue; | |
08d43a5f TZ |
585 | } |
586 | } | |
587 | ||
588 | idx++; | |
589 | } | |
590 | ||
591 | return NULL; | |
592 | } | |
593 | ||
594 | /** | |
595 | * tracing_map_insert - Insert key and/or retrieve val from a tracing_map | |
596 | * @map: The tracing_map to insert into | |
597 | * @key: The key to insert | |
598 | * | |
599 | * Inserts a key into a tracing_map and creates and returns a new | |
600 | * tracing_map_elt for it, or if the key has already been inserted by | |
601 | * a previous call, returns the tracing_map_elt already associated | |
602 | * with it. When the map was created, the number of elements to be | |
603 | * allocated for the map was specified (internally maintained as | |
604 | * 'max_elts' in struct tracing_map), and that number of | |
605 | * tracing_map_elts was created by tracing_map_init(). This is the | |
606 | * pre-allocated pool of tracing_map_elts that tracing_map_insert() | |
607 | * will allocate from when adding new keys. Once that pool is | |
608 | * exhausted, tracing_map_insert() is useless and will return NULL to | |
609 | * signal that state. There are two user-visible tracing_map | |
610 | * variables, 'hits' and 'drops', which are updated by this function. | |
611 | * Every time an element is either successfully inserted or retrieved, | |
612 | * the 'hits' value is incrememented. Every time an element insertion | |
613 | * fails, the 'drops' value is incremented. | |
614 | * | |
615 | * This is a lock-free tracing map insertion function implementing a | |
616 | * modified form of Cliff Click's basic insertion algorithm. It | |
617 | * requires the table size be a power of two. To prevent any | |
618 | * possibility of an infinite loop we always make the internal table | |
619 | * size double the size of the requested table size (max_elts * 2). | |
620 | * Likewise, we never reuse a slot or resize or delete elements - when | |
621 | * we've reached max_elts entries, we simply return NULL once we've | |
622 | * run out of entries. Readers can at any point in time traverse the | |
623 | * tracing map and safely access the key/val pairs. | |
624 | * | |
625 | * Return: the tracing_map_elt pointer val associated with the key. | |
626 | * If this was a newly inserted key, the val will be a newly allocated | |
627 | * and associated tracing_map_elt pointer val. If the key wasn't | |
628 | * found and the pool of tracing_map_elts has been exhausted, NULL is | |
629 | * returned and no further insertions will succeed. | |
630 | */ | |
631 | struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key) | |
632 | { | |
633 | return __tracing_map_insert(map, key, false); | |
634 | } | |
635 | ||
636 | /** | |
637 | * tracing_map_lookup - Retrieve val from a tracing_map | |
638 | * @map: The tracing_map to perform the lookup on | |
639 | * @key: The key to look up | |
640 | * | |
641 | * Looks up key in tracing_map and if found returns the matching | |
642 | * tracing_map_elt. This is a lock-free lookup; see | |
643 | * tracing_map_insert() for details on tracing_map and how it works. | |
644 | * Every time an element is retrieved, the 'hits' value is | |
645 | * incrememented. There is one user-visible tracing_map variable, | |
646 | * 'hits', which is updated by this function. Every time an element | |
647 | * is successfully retrieved, the 'hits' value is incrememented. The | |
648 | * 'drops' value is never updated by this function. | |
649 | * | |
650 | * Return: the tracing_map_elt pointer val associated with the key. | |
651 | * If the key wasn't found, NULL is returned. | |
652 | */ | |
653 | struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key) | |
654 | { | |
655 | return __tracing_map_insert(map, key, true); | |
656 | } | |
657 | ||
658 | /** | |
659 | * tracing_map_destroy - Destroy a tracing_map | |
660 | * @map: The tracing_map to destroy | |
661 | * | |
662 | * Frees a tracing_map along with its associated array of | |
663 | * tracing_map_elts. | |
664 | * | |
665 | * Callers should make sure there are no readers or writers actively | |
666 | * reading or inserting into the map before calling this. | |
667 | */ | |
668 | void tracing_map_destroy(struct tracing_map *map) | |
669 | { | |
670 | if (!map) | |
671 | return; | |
672 | ||
673 | tracing_map_free_elts(map); | |
674 | ||
675 | tracing_map_array_free(map->map); | |
676 | kfree(map); | |
677 | } | |
678 | ||
679 | /** | |
680 | * tracing_map_clear - Clear a tracing_map | |
681 | * @map: The tracing_map to clear | |
682 | * | |
683 | * Resets the tracing map to a cleared or initial state. The | |
684 | * tracing_map_elts are all cleared, and the array of struct | |
685 | * tracing_map_entry is reset to an initialized state. | |
686 | * | |
687 | * Callers should make sure there are no writers actively inserting | |
688 | * into the map before calling this. | |
689 | */ | |
690 | void tracing_map_clear(struct tracing_map *map) | |
691 | { | |
692 | unsigned int i; | |
693 | ||
694 | atomic_set(&map->next_elt, -1); | |
695 | atomic64_set(&map->hits, 0); | |
696 | atomic64_set(&map->drops, 0); | |
697 | ||
698 | tracing_map_array_clear(map->map); | |
699 | ||
700 | for (i = 0; i < map->max_elts; i++) | |
701 | tracing_map_elt_clear(*(TRACING_MAP_ELT(map->elts, i))); | |
702 | } | |
703 | ||
704 | static void set_sort_key(struct tracing_map *map, | |
705 | struct tracing_map_sort_key *sort_key) | |
706 | { | |
707 | map->sort_key = *sort_key; | |
708 | } | |
709 | ||
710 | /** | |
711 | * tracing_map_create - Create a lock-free map and element pool | |
712 | * @map_bits: The size of the map (2 ** map_bits) | |
713 | * @key_size: The size of the key for the map in bytes | |
714 | * @ops: Optional client-defined tracing_map_ops instance | |
715 | * @private_data: Client data associated with the map | |
716 | * | |
717 | * Creates and sets up a map to contain 2 ** map_bits number of | |
718 | * elements (internally maintained as 'max_elts' in struct | |
719 | * tracing_map). Before using, map fields should be added to the map | |
720 | * with tracing_map_add_sum_field() and tracing_map_add_key_field(). | |
721 | * tracing_map_init() should then be called to allocate the array of | |
722 | * tracing_map_elts, in order to avoid allocating anything in the map | |
723 | * insertion path. The user-specified map size reflects the maximum | |
724 | * number of elements that can be contained in the table requested by | |
725 | * the user - internally we double that in order to keep the table | |
726 | * sparse and keep collisions manageable. | |
727 | * | |
728 | * A tracing_map is a special-purpose map designed to aggregate or | |
729 | * 'sum' one or more values associated with a specific object of type | |
730 | * tracing_map_elt, which is attached by the map to a given key. | |
731 | * | |
732 | * tracing_map_create() sets up the map itself, and provides | |
733 | * operations for inserting tracing_map_elts, but doesn't allocate the | |
734 | * tracing_map_elts themselves, or provide a means for describing the | |
735 | * keys or sums associated with the tracing_map_elts. All | |
736 | * tracing_map_elts for a given map have the same set of sums and | |
737 | * keys, which are defined by the client using the functions | |
738 | * tracing_map_add_key_field() and tracing_map_add_sum_field(). Once | |
739 | * the fields are defined, the pool of elements allocated for the map | |
740 | * can be created, which occurs when the client code calls | |
741 | * tracing_map_init(). | |
742 | * | |
743 | * When tracing_map_init() returns, tracing_map_elt elements can be | |
744 | * inserted into the map using tracing_map_insert(). When called, | |
745 | * tracing_map_insert() grabs a free tracing_map_elt from the pool, or | |
746 | * finds an existing match in the map and in either case returns it. | |
747 | * The client can then use tracing_map_update_sum() and | |
748 | * tracing_map_read_sum() to update or read a given sum field for the | |
749 | * tracing_map_elt. | |
750 | * | |
751 | * The client can at any point retrieve and traverse the current set | |
752 | * of inserted tracing_map_elts in a tracing_map, via | |
753 | * tracing_map_sort_entries(). Sorting can be done on any field, | |
754 | * including keys. | |
755 | * | |
756 | * See tracing_map.h for a description of tracing_map_ops. | |
757 | * | |
758 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
759 | */ | |
760 | struct tracing_map *tracing_map_create(unsigned int map_bits, | |
761 | unsigned int key_size, | |
762 | const struct tracing_map_ops *ops, | |
763 | void *private_data) | |
764 | { | |
765 | struct tracing_map *map; | |
766 | unsigned int i; | |
767 | ||
768 | if (map_bits < TRACING_MAP_BITS_MIN || | |
769 | map_bits > TRACING_MAP_BITS_MAX) | |
770 | return ERR_PTR(-EINVAL); | |
771 | ||
772 | map = kzalloc(sizeof(*map), GFP_KERNEL); | |
773 | if (!map) | |
774 | return ERR_PTR(-ENOMEM); | |
775 | ||
776 | map->map_bits = map_bits; | |
777 | map->max_elts = (1 << map_bits); | |
778 | atomic_set(&map->next_elt, -1); | |
779 | ||
780 | map->map_size = (1 << (map_bits + 1)); | |
781 | map->ops = ops; | |
782 | ||
783 | map->private_data = private_data; | |
784 | ||
785 | map->map = tracing_map_array_alloc(map->map_size, | |
786 | sizeof(struct tracing_map_entry)); | |
787 | if (!map->map) | |
788 | goto free; | |
789 | ||
790 | map->key_size = key_size; | |
791 | for (i = 0; i < TRACING_MAP_KEYS_MAX; i++) | |
792 | map->key_idx[i] = -1; | |
793 | out: | |
794 | return map; | |
795 | free: | |
796 | tracing_map_destroy(map); | |
797 | map = ERR_PTR(-ENOMEM); | |
798 | ||
799 | goto out; | |
800 | } | |
801 | ||
802 | /** | |
803 | * tracing_map_init - Allocate and clear a map's tracing_map_elts | |
804 | * @map: The tracing_map to initialize | |
805 | * | |
806 | * Allocates a clears a pool of tracing_map_elts equal to the | |
807 | * user-specified size of 2 ** map_bits (internally maintained as | |
808 | * 'max_elts' in struct tracing_map). Before using, the map fields | |
809 | * should be added to the map with tracing_map_add_sum_field() and | |
810 | * tracing_map_add_key_field(). tracing_map_init() should then be | |
811 | * called to allocate the array of tracing_map_elts, in order to avoid | |
812 | * allocating anything in the map insertion path. The user-specified | |
813 | * map size reflects the max number of elements requested by the user | |
814 | * - internally we double that in order to keep the table sparse and | |
815 | * keep collisions manageable. | |
816 | * | |
817 | * See tracing_map.h for a description of tracing_map_ops. | |
818 | * | |
819 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
820 | */ | |
821 | int tracing_map_init(struct tracing_map *map) | |
822 | { | |
823 | int err; | |
824 | ||
825 | if (map->n_fields < 2) | |
826 | return -EINVAL; /* need at least 1 key and 1 val */ | |
827 | ||
828 | err = tracing_map_alloc_elts(map); | |
829 | if (err) | |
830 | return err; | |
831 | ||
832 | tracing_map_clear(map); | |
833 | ||
834 | return err; | |
835 | } | |
836 | ||
837 | static int cmp_entries_dup(const struct tracing_map_sort_entry **a, | |
838 | const struct tracing_map_sort_entry **b) | |
839 | { | |
840 | int ret = 0; | |
841 | ||
842 | if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) | |
843 | ret = 1; | |
844 | ||
845 | return ret; | |
846 | } | |
847 | ||
848 | static int cmp_entries_sum(const struct tracing_map_sort_entry **a, | |
849 | const struct tracing_map_sort_entry **b) | |
850 | { | |
851 | const struct tracing_map_elt *elt_a, *elt_b; | |
852 | struct tracing_map_sort_key *sort_key; | |
853 | struct tracing_map_field *field; | |
854 | tracing_map_cmp_fn_t cmp_fn; | |
855 | void *val_a, *val_b; | |
856 | int ret = 0; | |
857 | ||
858 | elt_a = (*a)->elt; | |
859 | elt_b = (*b)->elt; | |
860 | ||
861 | sort_key = &elt_a->map->sort_key; | |
862 | ||
863 | field = &elt_a->fields[sort_key->field_idx]; | |
864 | cmp_fn = field->cmp_fn; | |
865 | ||
866 | val_a = &elt_a->fields[sort_key->field_idx].sum; | |
867 | val_b = &elt_b->fields[sort_key->field_idx].sum; | |
868 | ||
869 | ret = cmp_fn(val_a, val_b); | |
870 | if (sort_key->descending) | |
871 | ret = -ret; | |
872 | ||
873 | return ret; | |
874 | } | |
875 | ||
876 | static int cmp_entries_key(const struct tracing_map_sort_entry **a, | |
877 | const struct tracing_map_sort_entry **b) | |
878 | { | |
879 | const struct tracing_map_elt *elt_a, *elt_b; | |
880 | struct tracing_map_sort_key *sort_key; | |
881 | struct tracing_map_field *field; | |
882 | tracing_map_cmp_fn_t cmp_fn; | |
883 | void *val_a, *val_b; | |
884 | int ret = 0; | |
885 | ||
886 | elt_a = (*a)->elt; | |
887 | elt_b = (*b)->elt; | |
888 | ||
889 | sort_key = &elt_a->map->sort_key; | |
890 | ||
891 | field = &elt_a->fields[sort_key->field_idx]; | |
892 | ||
893 | cmp_fn = field->cmp_fn; | |
894 | ||
895 | val_a = elt_a->key + field->offset; | |
896 | val_b = elt_b->key + field->offset; | |
897 | ||
898 | ret = cmp_fn(val_a, val_b); | |
899 | if (sort_key->descending) | |
900 | ret = -ret; | |
901 | ||
902 | return ret; | |
903 | } | |
904 | ||
905 | static void destroy_sort_entry(struct tracing_map_sort_entry *entry) | |
906 | { | |
907 | if (!entry) | |
908 | return; | |
909 | ||
910 | if (entry->elt_copied) | |
911 | tracing_map_elt_free(entry->elt); | |
912 | ||
913 | kfree(entry); | |
914 | } | |
915 | ||
916 | /** | |
917 | * tracing_map_destroy_sort_entries - Destroy an array of sort entries | |
918 | * @entries: The entries to destroy | |
919 | * @n_entries: The number of entries in the array | |
920 | * | |
921 | * Destroy the elements returned by a tracing_map_sort_entries() call. | |
922 | */ | |
923 | void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, | |
924 | unsigned int n_entries) | |
925 | { | |
926 | unsigned int i; | |
927 | ||
928 | for (i = 0; i < n_entries; i++) | |
929 | destroy_sort_entry(entries[i]); | |
930 | ||
931 | vfree(entries); | |
932 | } | |
933 | ||
934 | static struct tracing_map_sort_entry * | |
935 | create_sort_entry(void *key, struct tracing_map_elt *elt) | |
936 | { | |
937 | struct tracing_map_sort_entry *sort_entry; | |
938 | ||
939 | sort_entry = kzalloc(sizeof(*sort_entry), GFP_KERNEL); | |
940 | if (!sort_entry) | |
941 | return NULL; | |
942 | ||
943 | sort_entry->key = key; | |
944 | sort_entry->elt = elt; | |
945 | ||
946 | return sort_entry; | |
947 | } | |
948 | ||
c193707d | 949 | static void detect_dups(struct tracing_map_sort_entry **sort_entries, |
08d43a5f TZ |
950 | int n_entries, unsigned int key_size) |
951 | { | |
952 | unsigned int dups = 0, total_dups = 0; | |
c193707d | 953 | int i; |
08d43a5f TZ |
954 | void *key; |
955 | ||
956 | if (n_entries < 2) | |
c193707d | 957 | return; |
08d43a5f TZ |
958 | |
959 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
960 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | |
961 | ||
962 | key = sort_entries[0]->key; | |
963 | for (i = 1; i < n_entries; i++) { | |
964 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | |
965 | dups++; total_dups++; | |
08d43a5f TZ |
966 | continue; |
967 | } | |
968 | key = sort_entries[i]->key; | |
969 | dups = 0; | |
970 | } | |
971 | ||
c193707d VP |
972 | WARN_ONCE(total_dups > 0, |
973 | "Duplicates detected: %d\n", total_dups); | |
08d43a5f TZ |
974 | } |
975 | ||
976 | static bool is_key(struct tracing_map *map, unsigned int field_idx) | |
977 | { | |
978 | unsigned int i; | |
979 | ||
980 | for (i = 0; i < map->n_keys; i++) | |
981 | if (map->key_idx[i] == field_idx) | |
982 | return true; | |
983 | return false; | |
984 | } | |
985 | ||
986 | static void sort_secondary(struct tracing_map *map, | |
987 | const struct tracing_map_sort_entry **entries, | |
988 | unsigned int n_entries, | |
989 | struct tracing_map_sort_key *primary_key, | |
990 | struct tracing_map_sort_key *secondary_key) | |
991 | { | |
992 | int (*primary_fn)(const struct tracing_map_sort_entry **, | |
993 | const struct tracing_map_sort_entry **); | |
994 | int (*secondary_fn)(const struct tracing_map_sort_entry **, | |
995 | const struct tracing_map_sort_entry **); | |
996 | unsigned i, start = 0, n_sub = 1; | |
997 | ||
998 | if (is_key(map, primary_key->field_idx)) | |
999 | primary_fn = cmp_entries_key; | |
1000 | else | |
1001 | primary_fn = cmp_entries_sum; | |
1002 | ||
1003 | if (is_key(map, secondary_key->field_idx)) | |
1004 | secondary_fn = cmp_entries_key; | |
1005 | else | |
1006 | secondary_fn = cmp_entries_sum; | |
1007 | ||
1008 | for (i = 0; i < n_entries - 1; i++) { | |
1009 | const struct tracing_map_sort_entry **a = &entries[i]; | |
1010 | const struct tracing_map_sort_entry **b = &entries[i + 1]; | |
1011 | ||
1012 | if (primary_fn(a, b) == 0) { | |
1013 | n_sub++; | |
1014 | if (i < n_entries - 2) | |
1015 | continue; | |
1016 | } | |
1017 | ||
1018 | if (n_sub < 2) { | |
1019 | start = i + 1; | |
1020 | n_sub = 1; | |
1021 | continue; | |
1022 | } | |
1023 | ||
1024 | set_sort_key(map, secondary_key); | |
1025 | sort(&entries[start], n_sub, | |
1026 | sizeof(struct tracing_map_sort_entry *), | |
1027 | (int (*)(const void *, const void *))secondary_fn, NULL); | |
1028 | set_sort_key(map, primary_key); | |
1029 | ||
1030 | start = i + 1; | |
1031 | n_sub = 1; | |
1032 | } | |
1033 | } | |
1034 | ||
1035 | /** | |
1036 | * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map | |
1037 | * @map: The tracing_map | |
1038 | * @sort_key: The sort key to use for sorting | |
1039 | * @sort_entries: outval: pointer to allocated and sorted array of entries | |
1040 | * | |
1041 | * tracing_map_sort_entries() sorts the current set of entries in the | |
1042 | * map and returns the list of tracing_map_sort_entries containing | |
1043 | * them to the client in the sort_entries param. The client can | |
1044 | * access the struct tracing_map_elt element of interest directly as | |
1045 | * the 'elt' field of a returned struct tracing_map_sort_entry object. | |
1046 | * | |
1047 | * The sort_key has only two fields: idx and descending. 'idx' refers | |
1048 | * to the index of the field added via tracing_map_add_sum_field() or | |
1049 | * tracing_map_add_key_field() when the tracing_map was initialized. | |
1050 | * 'descending' is a flag that if set reverses the sort order, which | |
1051 | * by default is ascending. | |
1052 | * | |
1053 | * The client should not hold on to the returned array but should use | |
1054 | * it and call tracing_map_destroy_sort_entries() when done. | |
1055 | * | |
1056 | * Return: the number of sort_entries in the struct tracing_map_sort_entry | |
1057 | * array, negative on error | |
1058 | */ | |
1059 | int tracing_map_sort_entries(struct tracing_map *map, | |
1060 | struct tracing_map_sort_key *sort_keys, | |
1061 | unsigned int n_sort_keys, | |
1062 | struct tracing_map_sort_entry ***sort_entries) | |
1063 | { | |
1064 | int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, | |
1065 | const struct tracing_map_sort_entry **); | |
1066 | struct tracing_map_sort_entry *sort_entry, **entries; | |
1067 | int i, n_entries, ret; | |
1068 | ||
42bc47b3 | 1069 | entries = vmalloc(array_size(sizeof(sort_entry), map->max_elts)); |
08d43a5f TZ |
1070 | if (!entries) |
1071 | return -ENOMEM; | |
1072 | ||
1073 | for (i = 0, n_entries = 0; i < map->map_size; i++) { | |
1074 | struct tracing_map_entry *entry; | |
1075 | ||
1076 | entry = TRACING_MAP_ENTRY(map->map, i); | |
1077 | ||
1078 | if (!entry->key || !entry->val) | |
1079 | continue; | |
1080 | ||
1081 | entries[n_entries] = create_sort_entry(entry->val->key, | |
1082 | entry->val); | |
1083 | if (!entries[n_entries++]) { | |
1084 | ret = -ENOMEM; | |
1085 | goto free; | |
1086 | } | |
1087 | } | |
1088 | ||
1089 | if (n_entries == 0) { | |
1090 | ret = 0; | |
1091 | goto free; | |
1092 | } | |
1093 | ||
1094 | if (n_entries == 1) { | |
1095 | *sort_entries = entries; | |
1096 | return 1; | |
1097 | } | |
1098 | ||
c193707d | 1099 | detect_dups(entries, n_entries, map->key_size); |
08d43a5f TZ |
1100 | |
1101 | if (is_key(map, sort_keys[0].field_idx)) | |
1102 | cmp_entries_fn = cmp_entries_key; | |
1103 | else | |
1104 | cmp_entries_fn = cmp_entries_sum; | |
1105 | ||
1106 | set_sort_key(map, &sort_keys[0]); | |
1107 | ||
1108 | sort(entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
1109 | (int (*)(const void *, const void *))cmp_entries_fn, NULL); | |
1110 | ||
1111 | if (n_sort_keys > 1) | |
1112 | sort_secondary(map, | |
1113 | (const struct tracing_map_sort_entry **)entries, | |
1114 | n_entries, | |
1115 | &sort_keys[0], | |
1116 | &sort_keys[1]); | |
1117 | ||
1118 | *sort_entries = entries; | |
1119 | ||
1120 | return n_entries; | |
1121 | free: | |
1122 | tracing_map_destroy_sort_entries(entries, n_entries); | |
1123 | ||
1124 | return ret; | |
1125 | } |