]>
Commit | Line | Data |
---|---|---|
2e11264a EC |
1 | /* |
2 | * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. | |
3 | * | |
4 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | * | |
9 | * Assumptions: | |
10 | * - NULL cannot be inserted/removed as a pointer value. | |
11 | * - Trying to insert an already-existing hash-pointer pair is OK. However, | |
12 | * it is not OK to insert into the same hash table different hash-pointer | |
13 | * pairs that have the same pointer value, but not the hashes. | |
14 | * - Lookups are performed under an RCU read-critical section; removals | |
15 | * must wait for a grace period to elapse before freeing removed objects. | |
16 | * | |
17 | * Features: | |
18 | * - Reads (i.e. lookups and iterators) can be concurrent with other reads. | |
19 | * Lookups that are concurrent with writes to the same bucket will retry | |
20 | * via a seqlock; iterators acquire all bucket locks and therefore can be | |
21 | * concurrent with lookups and are serialized wrt writers. | |
22 | * - Writes (i.e. insertions/removals) can be concurrent with writes to | |
23 | * different buckets; writes to the same bucket are serialized through a lock. | |
24 | * - Optional auto-resizing: the hash table resizes up if the load surpasses | |
25 | * a certain threshold. Resizing is done concurrently with readers; writes | |
26 | * are serialized with the resize operation. | |
27 | * | |
28 | * The key structure is the bucket, which is cacheline-sized. Buckets | |
29 | * contain a few hash values and pointers; the u32 hash values are stored in | |
30 | * full so that resizing is fast. Having this structure instead of directly | |
31 | * chaining items has two advantages: | |
32 | * - Failed lookups fail fast, and touch a minimum number of cache lines. | |
33 | * - Resizing the hash table with concurrent lookups is easy. | |
34 | * | |
35 | * There are two types of buckets: | |
36 | * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. | |
37 | * 2. all "non-head" buckets (i.e. all others) are members of a chain that | |
38 | * starts from a head bucket. | |
39 | * Note that the seqlock and spinlock of a head bucket applies to all buckets | |
40 | * chained to it; these two fields are unused in non-head buckets. | |
41 | * | |
42 | * On removals, we move the last valid item in the chain to the position of the | |
43 | * just-removed entry. This makes lookups slightly faster, since the moment an | |
44 | * invalid entry is found, the (failed) lookup is over. | |
45 | * | |
46 | * Resizing is done by taking all bucket spinlocks (so that no other writers can | |
47 | * race with us) and then copying all entries into a new hash map. Then, the | |
48 | * ht->map pointer is set, and the old map is freed once no RCU readers can see | |
49 | * it anymore. | |
50 | * | |
51 | * Writers check for concurrent resizes by comparing ht->map before and after | |
52 | * acquiring their bucket lock. If they don't match, a resize has occured | |
53 | * while the bucket spinlock was being acquired. | |
54 | * | |
55 | * Related Work: | |
56 | * - Idea of cacheline-sized buckets with full hashes taken from: | |
57 | * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: | |
58 | * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. | |
59 | * - Why not RCU-based hash tables? They would allow us to get rid of the | |
60 | * seqlock, but resizing would take forever since RCU read critical | |
61 | * sections in QEMU take quite a long time. | |
62 | * More info on relativistic hash tables: | |
63 | * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash | |
64 | * Tables via Relativistic Programming", USENIX ATC'11. | |
65 | * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. | |
66 | * https://lwn.net/Articles/612021/ | |
67 | */ | |
e9abfcb5 | 68 | #include "qemu/osdep.h" |
2e11264a EC |
69 | #include "qemu/qht.h" |
70 | #include "qemu/atomic.h" | |
71 | #include "qemu/rcu.h" | |
72 | ||
73 | //#define QHT_DEBUG | |
74 | ||
75 | /* | |
76 | * We want to avoid false sharing of cache lines. Most systems have 64-byte | |
77 | * cache lines so we go with it for simplicity. | |
78 | * | |
79 | * Note that systems with smaller cache lines will be fine (the struct is | |
80 | * almost 64-bytes); systems with larger cache lines might suffer from | |
81 | * some false sharing. | |
82 | */ | |
83 | #define QHT_BUCKET_ALIGN 64 | |
84 | ||
85 | /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ | |
86 | #if HOST_LONG_BITS == 32 | |
87 | #define QHT_BUCKET_ENTRIES 6 | |
88 | #else /* 64-bit */ | |
89 | #define QHT_BUCKET_ENTRIES 4 | |
90 | #endif | |
91 | ||
92 | /* | |
93 | * Note: reading partially-updated pointers in @pointers could lead to | |
94 | * segfaults. We thus access them with atomic_read/set; this guarantees | |
95 | * that the compiler makes all those accesses atomic. We also need the | |
96 | * volatile-like behavior in atomic_read, since otherwise the compiler | |
97 | * might refetch the pointer. | |
98 | * atomic_read's are of course not necessary when the bucket lock is held. | |
99 | * | |
100 | * If both ht->lock and b->lock are grabbed, ht->lock should always | |
101 | * be grabbed first. | |
102 | */ | |
103 | struct qht_bucket { | |
104 | QemuSpin lock; | |
105 | QemuSeqLock sequence; | |
106 | uint32_t hashes[QHT_BUCKET_ENTRIES]; | |
107 | void *pointers[QHT_BUCKET_ENTRIES]; | |
108 | struct qht_bucket *next; | |
109 | } QEMU_ALIGNED(QHT_BUCKET_ALIGN); | |
110 | ||
111 | QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); | |
112 | ||
113 | /** | |
114 | * struct qht_map - structure to track an array of buckets | |
115 | * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind | |
116 | * find the whole struct. | |
117 | * @buckets: array of head buckets. It is constant once the map is created. | |
118 | * @n_buckets: number of head buckets. It is constant once the map is created. | |
119 | * @n_added_buckets: number of added (i.e. "non-head") buckets | |
120 | * @n_added_buckets_threshold: threshold to trigger an upward resize once the | |
121 | * number of added buckets surpasses it. | |
122 | * | |
123 | * Buckets are tracked in what we call a "map", i.e. this structure. | |
124 | */ | |
125 | struct qht_map { | |
126 | struct rcu_head rcu; | |
127 | struct qht_bucket *buckets; | |
128 | size_t n_buckets; | |
129 | size_t n_added_buckets; | |
130 | size_t n_added_buckets_threshold; | |
131 | }; | |
132 | ||
133 | /* trigger a resize when n_added_buckets > n_buckets / div */ | |
134 | #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 | |
135 | ||
76b553b3 EC |
136 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, |
137 | bool reset); | |
2e11264a EC |
138 | static void qht_grow_maybe(struct qht *ht); |
139 | ||
140 | #ifdef QHT_DEBUG | |
141 | ||
142 | #define qht_debug_assert(X) do { assert(X); } while (0) | |
143 | ||
144 | static void qht_bucket_debug__locked(struct qht_bucket *b) | |
145 | { | |
146 | bool seen_empty = false; | |
147 | bool corrupt = false; | |
148 | int i; | |
149 | ||
150 | do { | |
151 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
152 | if (b->pointers[i] == NULL) { | |
153 | seen_empty = true; | |
154 | continue; | |
155 | } | |
156 | if (seen_empty) { | |
157 | fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n", | |
158 | __func__, b, i, b->hashes[i], b->pointers[i]); | |
159 | corrupt = true; | |
160 | } | |
161 | } | |
162 | b = b->next; | |
163 | } while (b); | |
164 | qht_debug_assert(!corrupt); | |
165 | } | |
166 | ||
167 | static void qht_map_debug__all_locked(struct qht_map *map) | |
168 | { | |
169 | int i; | |
170 | ||
171 | for (i = 0; i < map->n_buckets; i++) { | |
172 | qht_bucket_debug__locked(&map->buckets[i]); | |
173 | } | |
174 | } | |
175 | #else | |
176 | ||
177 | #define qht_debug_assert(X) do { (void)(X); } while (0) | |
178 | ||
179 | static inline void qht_bucket_debug__locked(struct qht_bucket *b) | |
180 | { } | |
181 | ||
182 | static inline void qht_map_debug__all_locked(struct qht_map *map) | |
183 | { } | |
184 | #endif /* QHT_DEBUG */ | |
185 | ||
186 | static inline size_t qht_elems_to_buckets(size_t n_elems) | |
187 | { | |
188 | return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); | |
189 | } | |
190 | ||
191 | static inline void qht_head_init(struct qht_bucket *b) | |
192 | { | |
193 | memset(b, 0, sizeof(*b)); | |
194 | qemu_spin_init(&b->lock); | |
195 | seqlock_init(&b->sequence); | |
196 | } | |
197 | ||
198 | static inline | |
199 | struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash) | |
200 | { | |
201 | return &map->buckets[hash & (map->n_buckets - 1)]; | |
202 | } | |
203 | ||
204 | /* acquire all bucket locks from a map */ | |
205 | static void qht_map_lock_buckets(struct qht_map *map) | |
206 | { | |
207 | size_t i; | |
208 | ||
209 | for (i = 0; i < map->n_buckets; i++) { | |
210 | struct qht_bucket *b = &map->buckets[i]; | |
211 | ||
212 | qemu_spin_lock(&b->lock); | |
213 | } | |
214 | } | |
215 | ||
216 | static void qht_map_unlock_buckets(struct qht_map *map) | |
217 | { | |
218 | size_t i; | |
219 | ||
220 | for (i = 0; i < map->n_buckets; i++) { | |
221 | struct qht_bucket *b = &map->buckets[i]; | |
222 | ||
223 | qemu_spin_unlock(&b->lock); | |
224 | } | |
225 | } | |
226 | ||
227 | /* | |
228 | * Call with at least a bucket lock held. | |
229 | * @map should be the value read before acquiring the lock (or locks). | |
230 | */ | |
231 | static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map) | |
232 | { | |
233 | return map != ht->map; | |
234 | } | |
235 | ||
236 | /* | |
237 | * Grab all bucket locks, and set @pmap after making sure the map isn't stale. | |
238 | * | |
239 | * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. | |
240 | * | |
241 | * Note: callers cannot have ht->lock held. | |
242 | */ | |
243 | static inline | |
244 | void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) | |
245 | { | |
246 | struct qht_map *map; | |
247 | ||
248 | map = atomic_rcu_read(&ht->map); | |
249 | qht_map_lock_buckets(map); | |
250 | if (likely(!qht_map_is_stale__locked(ht, map))) { | |
251 | *pmap = map; | |
252 | return; | |
253 | } | |
254 | qht_map_unlock_buckets(map); | |
255 | ||
256 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ | |
257 | qemu_mutex_lock(&ht->lock); | |
258 | map = ht->map; | |
259 | qht_map_lock_buckets(map); | |
260 | qemu_mutex_unlock(&ht->lock); | |
261 | *pmap = map; | |
262 | return; | |
263 | } | |
264 | ||
265 | /* | |
266 | * Get a head bucket and lock it, making sure its parent map is not stale. | |
267 | * @pmap is filled with a pointer to the bucket's parent map. | |
268 | * | |
269 | * Unlock with qemu_spin_unlock(&b->lock). | |
270 | * | |
271 | * Note: callers cannot have ht->lock held. | |
272 | */ | |
273 | static inline | |
274 | struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, | |
275 | struct qht_map **pmap) | |
276 | { | |
277 | struct qht_bucket *b; | |
278 | struct qht_map *map; | |
279 | ||
280 | map = atomic_rcu_read(&ht->map); | |
281 | b = qht_map_to_bucket(map, hash); | |
282 | ||
283 | qemu_spin_lock(&b->lock); | |
284 | if (likely(!qht_map_is_stale__locked(ht, map))) { | |
285 | *pmap = map; | |
286 | return b; | |
287 | } | |
288 | qemu_spin_unlock(&b->lock); | |
289 | ||
290 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ | |
291 | qemu_mutex_lock(&ht->lock); | |
292 | map = ht->map; | |
293 | b = qht_map_to_bucket(map, hash); | |
294 | qemu_spin_lock(&b->lock); | |
295 | qemu_mutex_unlock(&ht->lock); | |
296 | *pmap = map; | |
297 | return b; | |
298 | } | |
299 | ||
300 | static inline bool qht_map_needs_resize(struct qht_map *map) | |
301 | { | |
302 | return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; | |
303 | } | |
304 | ||
305 | static inline void qht_chain_destroy(struct qht_bucket *head) | |
306 | { | |
307 | struct qht_bucket *curr = head->next; | |
308 | struct qht_bucket *prev; | |
309 | ||
310 | while (curr) { | |
311 | prev = curr; | |
312 | curr = curr->next; | |
313 | qemu_vfree(prev); | |
314 | } | |
315 | } | |
316 | ||
317 | /* pass only an orphan map */ | |
318 | static void qht_map_destroy(struct qht_map *map) | |
319 | { | |
320 | size_t i; | |
321 | ||
322 | for (i = 0; i < map->n_buckets; i++) { | |
323 | qht_chain_destroy(&map->buckets[i]); | |
324 | } | |
325 | qemu_vfree(map->buckets); | |
326 | g_free(map); | |
327 | } | |
328 | ||
329 | static struct qht_map *qht_map_create(size_t n_buckets) | |
330 | { | |
331 | struct qht_map *map; | |
332 | size_t i; | |
333 | ||
334 | map = g_malloc(sizeof(*map)); | |
335 | map->n_buckets = n_buckets; | |
336 | ||
337 | map->n_added_buckets = 0; | |
338 | map->n_added_buckets_threshold = n_buckets / | |
339 | QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; | |
340 | ||
341 | /* let tiny hash tables to at least add one non-head bucket */ | |
342 | if (unlikely(map->n_added_buckets_threshold == 0)) { | |
343 | map->n_added_buckets_threshold = 1; | |
344 | } | |
345 | ||
346 | map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, | |
347 | sizeof(*map->buckets) * n_buckets); | |
348 | for (i = 0; i < n_buckets; i++) { | |
349 | qht_head_init(&map->buckets[i]); | |
350 | } | |
351 | return map; | |
352 | } | |
353 | ||
354 | void qht_init(struct qht *ht, size_t n_elems, unsigned int mode) | |
355 | { | |
356 | struct qht_map *map; | |
357 | size_t n_buckets = qht_elems_to_buckets(n_elems); | |
358 | ||
359 | ht->mode = mode; | |
360 | qemu_mutex_init(&ht->lock); | |
361 | map = qht_map_create(n_buckets); | |
362 | atomic_rcu_set(&ht->map, map); | |
363 | } | |
364 | ||
365 | /* call only when there are no readers/writers left */ | |
366 | void qht_destroy(struct qht *ht) | |
367 | { | |
368 | qht_map_destroy(ht->map); | |
369 | memset(ht, 0, sizeof(*ht)); | |
370 | } | |
371 | ||
372 | static void qht_bucket_reset__locked(struct qht_bucket *head) | |
373 | { | |
374 | struct qht_bucket *b = head; | |
375 | int i; | |
376 | ||
377 | seqlock_write_begin(&head->sequence); | |
378 | do { | |
379 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
380 | if (b->pointers[i] == NULL) { | |
381 | goto done; | |
382 | } | |
a8906439 | 383 | atomic_set(&b->hashes[i], 0); |
2e11264a EC |
384 | atomic_set(&b->pointers[i], NULL); |
385 | } | |
386 | b = b->next; | |
387 | } while (b); | |
388 | done: | |
389 | seqlock_write_end(&head->sequence); | |
390 | } | |
391 | ||
392 | /* call with all bucket locks held */ | |
393 | static void qht_map_reset__all_locked(struct qht_map *map) | |
394 | { | |
395 | size_t i; | |
396 | ||
397 | for (i = 0; i < map->n_buckets; i++) { | |
398 | qht_bucket_reset__locked(&map->buckets[i]); | |
399 | } | |
400 | qht_map_debug__all_locked(map); | |
401 | } | |
402 | ||
403 | void qht_reset(struct qht *ht) | |
404 | { | |
405 | struct qht_map *map; | |
406 | ||
407 | qht_map_lock_buckets__no_stale(ht, &map); | |
408 | qht_map_reset__all_locked(map); | |
409 | qht_map_unlock_buckets(map); | |
410 | } | |
411 | ||
76b553b3 EC |
412 | static inline void qht_do_resize(struct qht *ht, struct qht_map *new) |
413 | { | |
414 | qht_do_resize_reset(ht, new, false); | |
415 | } | |
416 | ||
417 | static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) | |
418 | { | |
419 | qht_do_resize_reset(ht, new, true); | |
420 | } | |
421 | ||
2e11264a EC |
422 | bool qht_reset_size(struct qht *ht, size_t n_elems) |
423 | { | |
f555a9d0 | 424 | struct qht_map *new = NULL; |
2e11264a EC |
425 | struct qht_map *map; |
426 | size_t n_buckets; | |
2e11264a EC |
427 | |
428 | n_buckets = qht_elems_to_buckets(n_elems); | |
429 | ||
430 | qemu_mutex_lock(&ht->lock); | |
431 | map = ht->map; | |
432 | if (n_buckets != map->n_buckets) { | |
433 | new = qht_map_create(n_buckets); | |
2e11264a | 434 | } |
76b553b3 | 435 | qht_do_resize_and_reset(ht, new); |
2e11264a EC |
436 | qemu_mutex_unlock(&ht->lock); |
437 | ||
f555a9d0 | 438 | return !!new; |
2e11264a EC |
439 | } |
440 | ||
441 | static inline | |
442 | void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, | |
443 | const void *userp, uint32_t hash) | |
444 | { | |
445 | struct qht_bucket *b = head; | |
446 | int i; | |
447 | ||
448 | do { | |
449 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
a8906439 | 450 | if (atomic_read(&b->hashes[i]) == hash) { |
34506b30 PB |
451 | /* The pointer is dereferenced before seqlock_read_retry, |
452 | * so (unlike qht_insert__locked) we need to use | |
453 | * atomic_rcu_read here. | |
454 | */ | |
455 | void *p = atomic_rcu_read(&b->pointers[i]); | |
2e11264a EC |
456 | |
457 | if (likely(p) && likely(func(p, userp))) { | |
458 | return p; | |
459 | } | |
460 | } | |
461 | } | |
462 | b = atomic_rcu_read(&b->next); | |
463 | } while (b); | |
464 | ||
465 | return NULL; | |
466 | } | |
467 | ||
468 | static __attribute__((noinline)) | |
469 | void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func, | |
470 | const void *userp, uint32_t hash) | |
471 | { | |
472 | unsigned int version; | |
473 | void *ret; | |
474 | ||
475 | do { | |
476 | version = seqlock_read_begin(&b->sequence); | |
477 | ret = qht_do_lookup(b, func, userp, hash); | |
478 | } while (seqlock_read_retry(&b->sequence, version)); | |
479 | return ret; | |
480 | } | |
481 | ||
482 | void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp, | |
483 | uint32_t hash) | |
484 | { | |
485 | struct qht_bucket *b; | |
486 | struct qht_map *map; | |
487 | unsigned int version; | |
488 | void *ret; | |
489 | ||
490 | map = atomic_rcu_read(&ht->map); | |
491 | b = qht_map_to_bucket(map, hash); | |
492 | ||
493 | version = seqlock_read_begin(&b->sequence); | |
494 | ret = qht_do_lookup(b, func, userp, hash); | |
495 | if (likely(!seqlock_read_retry(&b->sequence, version))) { | |
496 | return ret; | |
497 | } | |
498 | /* | |
499 | * Removing the do/while from the fastpath gives a 4% perf. increase when | |
500 | * running a 100%-lookup microbenchmark. | |
501 | */ | |
502 | return qht_lookup__slowpath(b, func, userp, hash); | |
503 | } | |
504 | ||
505 | /* call with head->lock held */ | |
506 | static bool qht_insert__locked(struct qht *ht, struct qht_map *map, | |
507 | struct qht_bucket *head, void *p, uint32_t hash, | |
508 | bool *needs_resize) | |
509 | { | |
510 | struct qht_bucket *b = head; | |
511 | struct qht_bucket *prev = NULL; | |
512 | struct qht_bucket *new = NULL; | |
513 | int i; | |
514 | ||
515 | do { | |
516 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
517 | if (b->pointers[i]) { | |
518 | if (unlikely(b->pointers[i] == p)) { | |
519 | return false; | |
520 | } | |
521 | } else { | |
522 | goto found; | |
523 | } | |
524 | } | |
525 | prev = b; | |
526 | b = b->next; | |
527 | } while (b); | |
528 | ||
529 | b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); | |
530 | memset(b, 0, sizeof(*b)); | |
531 | new = b; | |
532 | i = 0; | |
533 | atomic_inc(&map->n_added_buckets); | |
534 | if (unlikely(qht_map_needs_resize(map)) && needs_resize) { | |
535 | *needs_resize = true; | |
536 | } | |
537 | ||
538 | found: | |
539 | /* found an empty key: acquire the seqlock and write */ | |
540 | seqlock_write_begin(&head->sequence); | |
541 | if (new) { | |
542 | atomic_rcu_set(&prev->next, b); | |
543 | } | |
34506b30 | 544 | /* smp_wmb() implicit in seqlock_write_begin. */ |
a8906439 | 545 | atomic_set(&b->hashes[i], hash); |
2e11264a EC |
546 | atomic_set(&b->pointers[i], p); |
547 | seqlock_write_end(&head->sequence); | |
548 | return true; | |
549 | } | |
550 | ||
551 | static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) | |
552 | { | |
553 | struct qht_map *map; | |
554 | ||
555 | /* | |
556 | * If the lock is taken it probably means there's an ongoing resize, | |
557 | * so bail out. | |
558 | */ | |
559 | if (qemu_mutex_trylock(&ht->lock)) { | |
560 | return; | |
561 | } | |
562 | map = ht->map; | |
563 | /* another thread might have just performed the resize we were after */ | |
564 | if (qht_map_needs_resize(map)) { | |
565 | struct qht_map *new = qht_map_create(map->n_buckets * 2); | |
566 | ||
2e11264a | 567 | qht_do_resize(ht, new); |
2e11264a EC |
568 | } |
569 | qemu_mutex_unlock(&ht->lock); | |
570 | } | |
571 | ||
572 | bool qht_insert(struct qht *ht, void *p, uint32_t hash) | |
573 | { | |
574 | struct qht_bucket *b; | |
575 | struct qht_map *map; | |
576 | bool needs_resize = false; | |
577 | bool ret; | |
578 | ||
579 | /* NULL pointers are not supported */ | |
580 | qht_debug_assert(p); | |
581 | ||
582 | b = qht_bucket_lock__no_stale(ht, hash, &map); | |
583 | ret = qht_insert__locked(ht, map, b, p, hash, &needs_resize); | |
584 | qht_bucket_debug__locked(b); | |
585 | qemu_spin_unlock(&b->lock); | |
586 | ||
587 | if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { | |
588 | qht_grow_maybe(ht); | |
589 | } | |
590 | return ret; | |
591 | } | |
592 | ||
593 | static inline bool qht_entry_is_last(struct qht_bucket *b, int pos) | |
594 | { | |
595 | if (pos == QHT_BUCKET_ENTRIES - 1) { | |
596 | if (b->next == NULL) { | |
597 | return true; | |
598 | } | |
599 | return b->next->pointers[0] == NULL; | |
600 | } | |
601 | return b->pointers[pos + 1] == NULL; | |
602 | } | |
603 | ||
604 | static void | |
605 | qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) | |
606 | { | |
607 | qht_debug_assert(!(to == from && i == j)); | |
608 | qht_debug_assert(to->pointers[i]); | |
609 | qht_debug_assert(from->pointers[j]); | |
610 | ||
a8906439 | 611 | atomic_set(&to->hashes[i], from->hashes[j]); |
2e11264a EC |
612 | atomic_set(&to->pointers[i], from->pointers[j]); |
613 | ||
a8906439 | 614 | atomic_set(&from->hashes[j], 0); |
2e11264a EC |
615 | atomic_set(&from->pointers[j], NULL); |
616 | } | |
617 | ||
618 | /* | |
619 | * Find the last valid entry in @head, and swap it with @orig[pos], which has | |
620 | * just been invalidated. | |
621 | */ | |
622 | static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) | |
623 | { | |
624 | struct qht_bucket *b = orig; | |
625 | struct qht_bucket *prev = NULL; | |
626 | int i; | |
627 | ||
628 | if (qht_entry_is_last(orig, pos)) { | |
629 | orig->hashes[pos] = 0; | |
630 | atomic_set(&orig->pointers[pos], NULL); | |
631 | return; | |
632 | } | |
633 | do { | |
634 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
635 | if (b->pointers[i]) { | |
636 | continue; | |
637 | } | |
638 | if (i > 0) { | |
639 | return qht_entry_move(orig, pos, b, i - 1); | |
640 | } | |
641 | qht_debug_assert(prev); | |
642 | return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); | |
643 | } | |
644 | prev = b; | |
645 | b = b->next; | |
646 | } while (b); | |
647 | /* no free entries other than orig[pos], so swap it with the last one */ | |
648 | qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); | |
649 | } | |
650 | ||
651 | /* call with b->lock held */ | |
652 | static inline | |
653 | bool qht_remove__locked(struct qht_map *map, struct qht_bucket *head, | |
654 | const void *p, uint32_t hash) | |
655 | { | |
656 | struct qht_bucket *b = head; | |
657 | int i; | |
658 | ||
659 | do { | |
660 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
661 | void *q = b->pointers[i]; | |
662 | ||
663 | if (unlikely(q == NULL)) { | |
664 | return false; | |
665 | } | |
666 | if (q == p) { | |
667 | qht_debug_assert(b->hashes[i] == hash); | |
668 | seqlock_write_begin(&head->sequence); | |
669 | qht_bucket_remove_entry(b, i); | |
670 | seqlock_write_end(&head->sequence); | |
671 | return true; | |
672 | } | |
673 | } | |
674 | b = b->next; | |
675 | } while (b); | |
676 | return false; | |
677 | } | |
678 | ||
679 | bool qht_remove(struct qht *ht, const void *p, uint32_t hash) | |
680 | { | |
681 | struct qht_bucket *b; | |
682 | struct qht_map *map; | |
683 | bool ret; | |
684 | ||
685 | /* NULL pointers are not supported */ | |
686 | qht_debug_assert(p); | |
687 | ||
688 | b = qht_bucket_lock__no_stale(ht, hash, &map); | |
689 | ret = qht_remove__locked(map, b, p, hash); | |
690 | qht_bucket_debug__locked(b); | |
691 | qemu_spin_unlock(&b->lock); | |
692 | return ret; | |
693 | } | |
694 | ||
695 | static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b, | |
696 | qht_iter_func_t func, void *userp) | |
697 | { | |
698 | int i; | |
699 | ||
700 | do { | |
701 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { | |
702 | if (b->pointers[i] == NULL) { | |
703 | return; | |
704 | } | |
705 | func(ht, b->pointers[i], b->hashes[i], userp); | |
706 | } | |
707 | b = b->next; | |
708 | } while (b); | |
709 | } | |
710 | ||
711 | /* call with all of the map's locks held */ | |
712 | static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map, | |
713 | qht_iter_func_t func, void *userp) | |
714 | { | |
715 | size_t i; | |
716 | ||
717 | for (i = 0; i < map->n_buckets; i++) { | |
718 | qht_bucket_iter(ht, &map->buckets[i], func, userp); | |
719 | } | |
720 | } | |
721 | ||
722 | void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) | |
723 | { | |
724 | struct qht_map *map; | |
725 | ||
726 | map = atomic_rcu_read(&ht->map); | |
727 | qht_map_lock_buckets(map); | |
728 | /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */ | |
729 | qht_map_iter__all_locked(ht, map, func, userp); | |
730 | qht_map_unlock_buckets(map); | |
731 | } | |
732 | ||
733 | static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) | |
734 | { | |
735 | struct qht_map *new = userp; | |
736 | struct qht_bucket *b = qht_map_to_bucket(new, hash); | |
737 | ||
738 | /* no need to acquire b->lock because no thread has seen this map yet */ | |
739 | qht_insert__locked(ht, new, b, p, hash, NULL); | |
740 | } | |
741 | ||
742 | /* | |
76b553b3 EC |
743 | * Atomically perform a resize and/or reset. |
744 | * Call with ht->lock held. | |
2e11264a | 745 | */ |
76b553b3 | 746 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) |
2e11264a EC |
747 | { |
748 | struct qht_map *old; | |
749 | ||
750 | old = ht->map; | |
76b553b3 | 751 | qht_map_lock_buckets(old); |
2e11264a | 752 | |
76b553b3 EC |
753 | if (reset) { |
754 | qht_map_reset__all_locked(old); | |
755 | } | |
756 | ||
757 | if (new == NULL) { | |
758 | qht_map_unlock_buckets(old); | |
759 | return; | |
760 | } | |
761 | ||
762 | g_assert_cmpuint(new->n_buckets, !=, old->n_buckets); | |
2e11264a EC |
763 | qht_map_iter__all_locked(ht, old, qht_map_copy, new); |
764 | qht_map_debug__all_locked(new); | |
765 | ||
766 | atomic_rcu_set(&ht->map, new); | |
76b553b3 | 767 | qht_map_unlock_buckets(old); |
2e11264a EC |
768 | call_rcu(old, qht_map_destroy, rcu); |
769 | } | |
770 | ||
771 | bool qht_resize(struct qht *ht, size_t n_elems) | |
772 | { | |
773 | size_t n_buckets = qht_elems_to_buckets(n_elems); | |
774 | size_t ret = false; | |
775 | ||
776 | qemu_mutex_lock(&ht->lock); | |
777 | if (n_buckets != ht->map->n_buckets) { | |
778 | struct qht_map *new; | |
2e11264a EC |
779 | |
780 | new = qht_map_create(n_buckets); | |
2e11264a | 781 | qht_do_resize(ht, new); |
2e11264a EC |
782 | ret = true; |
783 | } | |
784 | qemu_mutex_unlock(&ht->lock); | |
785 | ||
786 | return ret; | |
787 | } | |
788 | ||
789 | /* pass @stats to qht_statistics_destroy() when done */ | |
790 | void qht_statistics_init(struct qht *ht, struct qht_stats *stats) | |
791 | { | |
792 | struct qht_map *map; | |
793 | int i; | |
794 | ||
795 | map = atomic_rcu_read(&ht->map); | |
796 | ||
2e11264a EC |
797 | stats->used_head_buckets = 0; |
798 | stats->entries = 0; | |
799 | qdist_init(&stats->chain); | |
800 | qdist_init(&stats->occupancy); | |
7266ae91 EC |
801 | /* bail out if the qht has not yet been initialized */ |
802 | if (unlikely(map == NULL)) { | |
803 | stats->head_buckets = 0; | |
804 | return; | |
805 | } | |
806 | stats->head_buckets = map->n_buckets; | |
2e11264a EC |
807 | |
808 | for (i = 0; i < map->n_buckets; i++) { | |
809 | struct qht_bucket *head = &map->buckets[i]; | |
810 | struct qht_bucket *b; | |
811 | unsigned int version; | |
812 | size_t buckets; | |
813 | size_t entries; | |
814 | int j; | |
815 | ||
816 | do { | |
817 | version = seqlock_read_begin(&head->sequence); | |
818 | buckets = 0; | |
819 | entries = 0; | |
820 | b = head; | |
821 | do { | |
822 | for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { | |
823 | if (atomic_read(&b->pointers[j]) == NULL) { | |
824 | break; | |
825 | } | |
826 | entries++; | |
827 | } | |
828 | buckets++; | |
829 | b = atomic_rcu_read(&b->next); | |
830 | } while (b); | |
831 | } while (seqlock_read_retry(&head->sequence, version)); | |
832 | ||
833 | if (entries) { | |
834 | qdist_inc(&stats->chain, buckets); | |
835 | qdist_inc(&stats->occupancy, | |
836 | (double)entries / QHT_BUCKET_ENTRIES / buckets); | |
837 | stats->used_head_buckets++; | |
838 | stats->entries += entries; | |
839 | } else { | |
840 | qdist_inc(&stats->occupancy, 0); | |
841 | } | |
842 | } | |
843 | } | |
844 | ||
845 | void qht_statistics_destroy(struct qht_stats *stats) | |
846 | { | |
847 | qdist_destroy(&stats->occupancy); | |
848 | qdist_destroy(&stats->chain); | |
849 | } |