Git Repo - qemu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* qht.c - QEMU Hash Table, designed to scale for read-mostly workloads.
	3	*
	4	* Copyright (C) 2016, Emilio G. Cota <[email protected]>
	5	*
	6	* License: GNU GPL, version 2 or later.
	7	* See the COPYING file in the top-level directory.
	8	*
	9	* Assumptions:
	10	* - NULL cannot be inserted/removed as a pointer value.
	11	* - Trying to insert an already-existing hash-pointer pair is OK. However,
	12	* it is not OK to insert into the same hash table different hash-pointer
	13	* pairs that have the same pointer value, but not the hashes.
	14	* - Lookups are performed under an RCU read-critical section; removals
	15	* must wait for a grace period to elapse before freeing removed objects.
	16	*
	17	* Features:
	18	* - Reads (i.e. lookups and iterators) can be concurrent with other reads.
	19	* Lookups that are concurrent with writes to the same bucket will retry
	20	* via a seqlock; iterators acquire all bucket locks and therefore can be
	21	* concurrent with lookups and are serialized wrt writers.
	22	* - Writes (i.e. insertions/removals) can be concurrent with writes to
	23	* different buckets; writes to the same bucket are serialized through a lock.
	24	* - Optional auto-resizing: the hash table resizes up if the load surpasses
	25	* a certain threshold. Resizing is done concurrently with readers; writes
	26	* are serialized with the resize operation.
	27	*
	28	* The key structure is the bucket, which is cacheline-sized. Buckets
	29	* contain a few hash values and pointers; the u32 hash values are stored in
	30	* full so that resizing is fast. Having this structure instead of directly
	31	* chaining items has two advantages:
	32	* - Failed lookups fail fast, and touch a minimum number of cache lines.
	33	* - Resizing the hash table with concurrent lookups is easy.
	34	*
	35	* There are two types of buckets:
	36	* 1. "head" buckets are the ones allocated in the array of buckets in qht_map.
	37	* 2. all "non-head" buckets (i.e. all others) are members of a chain that
	38	* starts from a head bucket.
	39	* Note that the seqlock and spinlock of a head bucket applies to all buckets
	40	* chained to it; these two fields are unused in non-head buckets.
	41	*
	42	* On removals, we move the last valid item in the chain to the position of the
	43	* just-removed entry. This makes lookups slightly faster, since the moment an
	44	* invalid entry is found, the (failed) lookup is over.
	45	*
	46	* Resizing is done by taking all bucket spinlocks (so that no other writers can
	47	* race with us) and then copying all entries into a new hash map. Then, the
	48	* ht->map pointer is set, and the old map is freed once no RCU readers can see
	49	* it anymore.
	50	*
	51	* Writers check for concurrent resizes by comparing ht->map before and after
	52	* acquiring their bucket lock. If they don't match, a resize has occurred
	53	* while the bucket spinlock was being acquired.
	54	*
	55	* Related Work:
	56	* - Idea of cacheline-sized buckets with full hashes taken from:
	57	* David, Guerraoui & Trigonakis, "Asynchronized Concurrency:
	58	* The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15.
	59	* - Why not RCU-based hash tables? They would allow us to get rid of the
	60	* seqlock, but resizing would take forever since RCU read critical
	61	* sections in QEMU take quite a long time.
	62	* More info on relativistic hash tables:
	63	* + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash
	64	* Tables via Relativistic Programming", USENIX ATC'11.
	65	* + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014.
	66	* https://lwn.net/Articles/612021/
	67	*/
	68	#include "qemu/osdep.h"
	69	#include "qemu/qht.h"
	70	#include "qemu/atomic.h"
	71	#include "qemu/rcu.h"
	72	#include "qemu/memalign.h"
	73
	74	//#define QHT_DEBUG
	75
	76	/*
	77	* We want to avoid false sharing of cache lines. Most systems have 64-byte
	78	* cache lines so we go with it for simplicity.
	79	*
	80	* Note that systems with smaller cache lines will be fine (the struct is
	81	* almost 64-bytes); systems with larger cache lines might suffer from
	82	* some false sharing.
	83	*/
	84	#define QHT_BUCKET_ALIGN 64
	85
	86	/* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */
	87	#if HOST_LONG_BITS == 32
	88	#define QHT_BUCKET_ENTRIES 6
	89	#else /* 64-bit */
	90	#define QHT_BUCKET_ENTRIES 4
	91	#endif
	92
	93	enum qht_iter_type {
	94	QHT_ITER_VOID, /* do nothing; use retvoid */
	95	QHT_ITER_RM, /* remove element if retbool returns true */
	96	};
	97
	98	struct qht_iter {
	99	union {
	100	qht_iter_func_t retvoid;
	101	qht_iter_bool_func_t retbool;
	102	} f;
	103	enum qht_iter_type type;
	104	};
	105
	106	/*
	107	* Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
	108	* the profiler (QSP) will deadlock.
	109	*/
	110	static inline void qht_lock(struct qht *ht)
	111	{
	112	if (ht->mode & QHT_MODE_RAW_MUTEXES) {
	113	qemu_mutex_lock__raw(&ht->lock);
	114	} else {
	115	qemu_mutex_lock(&ht->lock);
	116	}
	117	}
	118
	119	static inline int qht_trylock(struct qht *ht)
	120	{
	121	if (ht->mode & QHT_MODE_RAW_MUTEXES) {
	122	return qemu_mutex_trylock__raw(&(ht)->lock);
	123	}
	124	return qemu_mutex_trylock(&(ht)->lock);
	125	}
	126
	127	/* this inline is not really necessary, but it helps keep code consistent */
	128	static inline void qht_unlock(struct qht *ht)
	129	{
	130	qemu_mutex_unlock(&ht->lock);
	131	}
	132
	133	/*
	134	* Note: reading partially-updated pointers in @pointers could lead to
	135	* segfaults. We thus access them with qatomic_read/set; this guarantees
	136	* that the compiler makes all those accesses atomic. We also need the
	137	* volatile-like behavior in qatomic_read, since otherwise the compiler
	138	* might refetch the pointer.
	139	* qatomic_read's are of course not necessary when the bucket lock is held.
	140	*
	141	* If both ht->lock and b->lock are grabbed, ht->lock should always
	142	* be grabbed first.
	143	*/
	144	struct qht_bucket {
	145	QemuSpin lock;
	146	QemuSeqLock sequence;
	147	uint32_t hashes[QHT_BUCKET_ENTRIES];
	148	void *pointers[QHT_BUCKET_ENTRIES];
	149	struct qht_bucket *next;
	150	} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
	151
	152	QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
	153
	154	/**
	155	* struct qht_map - structure to track an array of buckets
	156	* @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
	157	* find the whole struct.
	158	* @buckets: array of head buckets. It is constant once the map is created.
	159	* @n_buckets: number of head buckets. It is constant once the map is created.
	160	* @n_added_buckets: number of added (i.e. "non-head") buckets
	161	* @n_added_buckets_threshold: threshold to trigger an upward resize once the
	162	* number of added buckets surpasses it.
	163	*
	164	* Buckets are tracked in what we call a "map", i.e. this structure.
	165	*/
	166	struct qht_map {
	167	struct rcu_head rcu;
	168	struct qht_bucket *buckets;
	169	size_t n_buckets;
	170	size_t n_added_buckets;
	171	size_t n_added_buckets_threshold;
	172	};
	173
	174	/* trigger a resize when n_added_buckets > n_buckets / div */
	175	#define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
	176
	177	static void qht_do_resize_reset(struct qht ht, struct qht_map new,
	178	bool reset);
	179	static void qht_grow_maybe(struct qht *ht);
	180
	181	#ifdef QHT_DEBUG
	182
	183	#define qht_debug_assert(X) do { assert(X); } while (0)
	184
	185	static void qht_bucket_debug__locked(struct qht_bucket *b)
	186	{
	187	bool seen_empty = false;
	188	bool corrupt = false;
	189	int i;
	190
	191	do {
	192	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	193	if (b->pointers[i] == NULL) {
	194	seen_empty = true;
	195	continue;
	196	}
	197	if (seen_empty) {
	198	fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n",
	199	__func__, b, i, b->hashes[i], b->pointers[i]);
	200	corrupt = true;
	201	}
	202	}
	203	b = b->next;
	204	} while (b);
	205	qht_debug_assert(!corrupt);
	206	}
	207
	208	static void qht_map_debug__all_locked(struct qht_map *map)
	209	{
	210	int i;
	211
	212	for (i = 0; i < map->n_buckets; i++) {
	213	qht_bucket_debug__locked(&map->buckets[i]);
	214	}
	215	}
	216	#else
	217
	218	#define qht_debug_assert(X) do { (void)(X); } while (0)
	219
	220	static inline void qht_bucket_debug__locked(struct qht_bucket *b)
	221	{ }
	222
	223	static inline void qht_map_debug__all_locked(struct qht_map *map)
	224	{ }
	225	#endif /* QHT_DEBUG */
	226
	227	static inline size_t qht_elems_to_buckets(size_t n_elems)
	228	{
	229	return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
	230	}
	231
	232	static inline void qht_head_init(struct qht_bucket *b)
	233	{
	234	memset(b, 0, sizeof(*b));
	235	qemu_spin_init(&b->lock);
	236	seqlock_init(&b->sequence);
	237	}
	238
	239	static inline
	240	struct qht_bucket qht_map_to_bucket(const struct qht_map map, uint32_t hash)
	241	{
	242	return &map->buckets[hash & (map->n_buckets - 1)];
	243	}
	244
	245	/* acquire all bucket locks from a map */
	246	static void qht_map_lock_buckets(struct qht_map *map)
	247	{
	248	size_t i;
	249
	250	for (i = 0; i < map->n_buckets; i++) {
	251	struct qht_bucket *b = &map->buckets[i];
	252
	253	qemu_spin_lock(&b->lock);
	254	}
	255	}
	256
	257	static void qht_map_unlock_buckets(struct qht_map *map)
	258	{
	259	size_t i;
	260
	261	for (i = 0; i < map->n_buckets; i++) {
	262	struct qht_bucket *b = &map->buckets[i];
	263
	264	qemu_spin_unlock(&b->lock);
	265	}
	266	}
	267
	268	/*
	269	* Call with at least a bucket lock held.
	270	* @map should be the value read before acquiring the lock (or locks).
	271	*/
	272	static inline bool qht_map_is_stale__locked(const struct qht *ht,
	273	const struct qht_map *map)
	274	{
	275	return map != ht->map;
	276	}
	277
	278	/*
	279	* Grab all bucket locks, and set @pmap after making sure the map isn't stale.
	280	*
	281	* Pairs with qht_map_unlock_buckets(), hence the pass-by-reference.
	282	*
	283	* Note: callers cannot have ht->lock held.
	284	*/
	285	static inline
	286	void qht_map_lock_buckets__no_stale(struct qht ht, struct qht_map *pmap)
	287	{
	288	struct qht_map *map;
	289
	290	map = qatomic_rcu_read(&ht->map);
	291	qht_map_lock_buckets(map);
	292	if (likely(!qht_map_is_stale__locked(ht, map))) {
	293	*pmap = map;
	294	return;
	295	}
	296	qht_map_unlock_buckets(map);
	297
	298	/* we raced with a resize; acquire ht->lock to see the updated ht->map */
	299	qht_lock(ht);
	300	map = ht->map;
	301	qht_map_lock_buckets(map);
	302	qht_unlock(ht);
	303	*pmap = map;
	304	return;
	305	}
	306
	307	/*
	308	* Get a head bucket and lock it, making sure its parent map is not stale.
	309	* @pmap is filled with a pointer to the bucket's parent map.
	310	*
	311	* Unlock with qemu_spin_unlock(&b->lock).
	312	*
	313	* Note: callers cannot have ht->lock held.
	314	*/
	315	static inline
	316	struct qht_bucket qht_bucket_lock__no_stale(struct qht ht, uint32_t hash,
	317	struct qht_map **pmap)
	318	{
	319	struct qht_bucket *b;
	320	struct qht_map *map;
	321
	322	map = qatomic_rcu_read(&ht->map);
	323	b = qht_map_to_bucket(map, hash);
	324
	325	qemu_spin_lock(&b->lock);
	326	if (likely(!qht_map_is_stale__locked(ht, map))) {
	327	*pmap = map;
	328	return b;
	329	}
	330	qemu_spin_unlock(&b->lock);
	331
	332	/* we raced with a resize; acquire ht->lock to see the updated ht->map */
	333	qht_lock(ht);
	334	map = ht->map;
	335	b = qht_map_to_bucket(map, hash);
	336	qemu_spin_lock(&b->lock);
	337	qht_unlock(ht);
	338	*pmap = map;
	339	return b;
	340	}
	341
	342	static inline bool qht_map_needs_resize(const struct qht_map *map)
	343	{
	344	return qatomic_read(&map->n_added_buckets) >
	345	map->n_added_buckets_threshold;
	346	}
	347
	348	static inline void qht_chain_destroy(const struct qht_bucket *head)
	349	{
	350	struct qht_bucket *curr = head->next;
	351	struct qht_bucket *prev;
	352
	353	qemu_spin_destroy(&head->lock);
	354	while (curr) {
	355	prev = curr;
	356	curr = curr->next;
	357	qemu_vfree(prev);
	358	}
	359	}
	360
	361	/* pass only an orphan map */
	362	static void qht_map_destroy(struct qht_map *map)
	363	{
	364	size_t i;
	365
	366	for (i = 0; i < map->n_buckets; i++) {
	367	qht_chain_destroy(&map->buckets[i]);
	368	}
	369	qemu_vfree(map->buckets);
	370	g_free(map);
	371	}
	372
	373	static struct qht_map *qht_map_create(size_t n_buckets)
	374	{
	375	struct qht_map *map;
	376	size_t i;
	377
	378	map = g_malloc(sizeof(*map));
	379	map->n_buckets = n_buckets;
	380
	381	map->n_added_buckets = 0;
	382	map->n_added_buckets_threshold = n_buckets /
	383	QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV;
	384
	385	/* let tiny hash tables to at least add one non-head bucket */
	386	if (unlikely(map->n_added_buckets_threshold == 0)) {
	387	map->n_added_buckets_threshold = 1;
	388	}
	389
	390	map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
	391	sizeof(map->buckets) n_buckets);
	392	for (i = 0; i < n_buckets; i++) {
	393	qht_head_init(&map->buckets[i]);
	394	}
	395	return map;
	396	}
	397
	398	void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
	399	unsigned int mode)
	400	{
	401	struct qht_map *map;
	402	size_t n_buckets = qht_elems_to_buckets(n_elems);
	403
	404	g_assert(cmp);
	405	ht->cmp = cmp;
	406	ht->mode = mode;
	407	qemu_mutex_init(&ht->lock);
	408	map = qht_map_create(n_buckets);
	409	qatomic_rcu_set(&ht->map, map);
	410	}
	411
	412	/* call only when there are no readers/writers left */
	413	void qht_destroy(struct qht *ht)
	414	{
	415	qht_map_destroy(ht->map);
	416	memset(ht, 0, sizeof(*ht));
	417	}
	418
	419	static void qht_bucket_reset__locked(struct qht_bucket *head)
	420	{
	421	struct qht_bucket *b = head;
	422	int i;
	423
	424	seqlock_write_begin(&head->sequence);
	425	do {
	426	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	427	if (b->pointers[i] == NULL) {
	428	goto done;
	429	}
	430	qatomic_set(&b->hashes[i], 0);
	431	qatomic_set(&b->pointers[i], NULL);
	432	}
	433	b = b->next;
	434	} while (b);
	435	done:
	436	seqlock_write_end(&head->sequence);
	437	}
	438
	439	/* call with all bucket locks held */
	440	static void qht_map_reset__all_locked(struct qht_map *map)
	441	{
	442	size_t i;
	443
	444	for (i = 0; i < map->n_buckets; i++) {
	445	qht_bucket_reset__locked(&map->buckets[i]);
	446	}
	447	qht_map_debug__all_locked(map);
	448	}
	449
	450	void qht_reset(struct qht *ht)
	451	{
	452	struct qht_map *map;
	453
	454	qht_map_lock_buckets__no_stale(ht, &map);
	455	qht_map_reset__all_locked(map);
	456	qht_map_unlock_buckets(map);
	457	}
	458
	459	static inline void qht_do_resize(struct qht ht, struct qht_map new)
	460	{
	461	qht_do_resize_reset(ht, new, false);
	462	}
	463
	464	static inline void qht_do_resize_and_reset(struct qht ht, struct qht_map new)
	465	{
	466	qht_do_resize_reset(ht, new, true);
	467	}
	468
	469	bool qht_reset_size(struct qht *ht, size_t n_elems)
	470	{
	471	struct qht_map *new = NULL;
	472	struct qht_map *map;
	473	size_t n_buckets;
	474
	475	n_buckets = qht_elems_to_buckets(n_elems);
	476
	477	qht_lock(ht);
	478	map = ht->map;
	479	if (n_buckets != map->n_buckets) {
	480	new = qht_map_create(n_buckets);
	481	}
	482	qht_do_resize_and_reset(ht, new);
	483	qht_unlock(ht);
	484
	485	return !!new;
	486	}
	487
	488	static inline
	489	void qht_do_lookup(const struct qht_bucket head, qht_lookup_func_t func,
	490	const void *userp, uint32_t hash)
	491	{
	492	const struct qht_bucket *b = head;
	493	int i;
	494
	495	do {
	496	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	497	if (qatomic_read(&b->hashes[i]) == hash) {
	498	/* The pointer is dereferenced before seqlock_read_retry,
	499	* so (unlike qht_insert__locked) we need to use
	500	* qatomic_rcu_read here.
	501	*/
	502	void *p = qatomic_rcu_read(&b->pointers[i]);
	503
	504	if (likely(p) && likely(func(p, userp))) {
	505	return p;
	506	}
	507	}
	508	}
	509	b = qatomic_rcu_read(&b->next);
	510	} while (b);
	511
	512	return NULL;
	513	}
	514
	515	static __attribute__((noinline))
	516	void qht_lookup__slowpath(const struct qht_bucket b, qht_lookup_func_t func,
	517	const void *userp, uint32_t hash)
	518	{
	519	unsigned int version;
	520	void *ret;
	521
	522	do {
	523	version = seqlock_read_begin(&b->sequence);
	524	ret = qht_do_lookup(b, func, userp, hash);
	525	} while (seqlock_read_retry(&b->sequence, version));
	526	return ret;
	527	}
	528
	529	void qht_lookup_custom(const struct qht ht, const void *userp, uint32_t hash,
	530	qht_lookup_func_t func)
	531	{
	532	const struct qht_bucket *b;
	533	const struct qht_map *map;
	534	unsigned int version;
	535	void *ret;
	536
	537	map = qatomic_rcu_read(&ht->map);
	538	b = qht_map_to_bucket(map, hash);
	539
	540	version = seqlock_read_begin(&b->sequence);
	541	ret = qht_do_lookup(b, func, userp, hash);
	542	if (likely(!seqlock_read_retry(&b->sequence, version))) {
	543	return ret;
	544	}
	545	/*
	546	* Removing the do/while from the fastpath gives a 4% perf. increase when
	547	* running a 100%-lookup microbenchmark.
	548	*/
	549	return qht_lookup__slowpath(b, func, userp, hash);
	550	}
	551
	552	void qht_lookup(const struct qht ht, const void *userp, uint32_t hash)
	553	{
	554	return qht_lookup_custom(ht, userp, hash, ht->cmp);
	555	}
	556
	557	/*
	558	* call with head->lock held
	559	* @ht is const since it is only used for ht->cmp()
	560	*/
	561	static void qht_insert__locked(const struct qht ht, struct qht_map *map,
	562	struct qht_bucket head, void p, uint32_t hash,
	563	bool *needs_resize)
	564	{
	565	struct qht_bucket *b = head;
	566	struct qht_bucket *prev = NULL;
	567	struct qht_bucket *new = NULL;
	568	int i;
	569
	570	do {
	571	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	572	if (b->pointers[i]) {
	573	if (unlikely(b->hashes[i] == hash &&
	574	ht->cmp(b->pointers[i], p))) {
	575	return b->pointers[i];
	576	}
	577	} else {
	578	goto found;
	579	}
	580	}
	581	prev = b;
	582	b = b->next;
	583	} while (b);
	584
	585	b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b));
	586	memset(b, 0, sizeof(*b));
	587	new = b;
	588	i = 0;
	589	qatomic_inc(&map->n_added_buckets);
	590	if (unlikely(qht_map_needs_resize(map)) && needs_resize) {
	591	*needs_resize = true;
	592	}
	593
	594	found:
	595	/* found an empty key: acquire the seqlock and write */
	596	seqlock_write_begin(&head->sequence);
	597	if (new) {
	598	qatomic_rcu_set(&prev->next, b);
	599	}
	600	/* smp_wmb() implicit in seqlock_write_begin. */
	601	qatomic_set(&b->hashes[i], hash);
	602	qatomic_set(&b->pointers[i], p);
	603	seqlock_write_end(&head->sequence);
	604	return NULL;
	605	}
	606
	607	static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
	608	{
	609	struct qht_map *map;
	610
	611	/*
	612	* If the lock is taken it probably means there's an ongoing resize,
	613	* so bail out.
	614	*/
	615	if (qht_trylock(ht)) {
	616	return;
	617	}
	618	map = ht->map;
	619	/* another thread might have just performed the resize we were after */
	620	if (qht_map_needs_resize(map)) {
	621	struct qht_map new = qht_map_create(map->n_buckets 2);
	622
	623	qht_do_resize(ht, new);
	624	}
	625	qht_unlock(ht);
	626	}
	627
	628	bool qht_insert(struct qht ht, void p, uint32_t hash, void **existing)
	629	{
	630	struct qht_bucket *b;
	631	struct qht_map *map;
	632	bool needs_resize = false;
	633	void *prev;
	634
	635	/* NULL pointers are not supported */
	636	qht_debug_assert(p);
	637
	638	b = qht_bucket_lock__no_stale(ht, hash, &map);
	639	prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
	640	qht_bucket_debug__locked(b);
	641	qemu_spin_unlock(&b->lock);
	642
	643	if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
	644	qht_grow_maybe(ht);
	645	}
	646	if (likely(prev == NULL)) {
	647	return true;
	648	}
	649	if (existing) {
	650	*existing = prev;
	651	}
	652	return false;
	653	}
	654
	655	static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos)
	656	{
	657	if (pos == QHT_BUCKET_ENTRIES - 1) {
	658	if (b->next == NULL) {
	659	return true;
	660	}
	661	return b->next->pointers[0] == NULL;
	662	}
	663	return b->pointers[pos + 1] == NULL;
	664	}
	665
	666	static void
	667	qht_entry_move(struct qht_bucket to, int i, struct qht_bucket from, int j)
	668	{
	669	qht_debug_assert(!(to == from && i == j));
	670	qht_debug_assert(to->pointers[i]);
	671	qht_debug_assert(from->pointers[j]);
	672
	673	qatomic_set(&to->hashes[i], from->hashes[j]);
	674	qatomic_set(&to->pointers[i], from->pointers[j]);
	675
	676	qatomic_set(&from->hashes[j], 0);
	677	qatomic_set(&from->pointers[j], NULL);
	678	}
	679
	680	/*
	681	* Find the last valid entry in @orig, and swap it with @orig[pos], which has
	682	* just been invalidated.
	683	*/
	684	static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
	685	{
	686	struct qht_bucket *b = orig;
	687	struct qht_bucket *prev = NULL;
	688	int i;
	689
	690	if (qht_entry_is_last(orig, pos)) {
	691	orig->hashes[pos] = 0;
	692	qatomic_set(&orig->pointers[pos], NULL);
	693	return;
	694	}
	695	do {
	696	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	697	if (b->pointers[i]) {
	698	continue;
	699	}
	700	if (i > 0) {
	701	return qht_entry_move(orig, pos, b, i - 1);
	702	}
	703	qht_debug_assert(prev);
	704	return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
	705	}
	706	prev = b;
	707	b = b->next;
	708	} while (b);
	709	/* no free entries other than orig[pos], so swap it with the last one */
	710	qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1);
	711	}
	712
	713	/* call with b->lock held */
	714	static inline
	715	bool qht_remove__locked(struct qht_bucket head, const void p, uint32_t hash)
	716	{
	717	struct qht_bucket *b = head;
	718	int i;
	719
	720	do {
	721	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	722	void *q = b->pointers[i];
	723
	724	if (unlikely(q == NULL)) {
	725	return false;
	726	}
	727	if (q == p) {
	728	qht_debug_assert(b->hashes[i] == hash);
	729	seqlock_write_begin(&head->sequence);
	730	qht_bucket_remove_entry(b, i);
	731	seqlock_write_end(&head->sequence);
	732	return true;
	733	}
	734	}
	735	b = b->next;
	736	} while (b);
	737	return false;
	738	}
	739
	740	bool qht_remove(struct qht ht, const void p, uint32_t hash)
	741	{
	742	struct qht_bucket *b;
	743	struct qht_map *map;
	744	bool ret;
	745
	746	/* NULL pointers are not supported */
	747	qht_debug_assert(p);
	748
	749	b = qht_bucket_lock__no_stale(ht, hash, &map);
	750	ret = qht_remove__locked(b, p, hash);
	751	qht_bucket_debug__locked(b);
	752	qemu_spin_unlock(&b->lock);
	753	return ret;
	754	}
	755
	756	static inline void qht_bucket_iter(struct qht_bucket *head,
	757	const struct qht_iter iter, void userp)
	758	{
	759	struct qht_bucket *b = head;
	760	int i;
	761
	762	do {
	763	for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
	764	if (b->pointers[i] == NULL) {
	765	return;
	766	}
	767	switch (iter->type) {
	768	case QHT_ITER_VOID:
	769	iter->f.retvoid(b->pointers[i], b->hashes[i], userp);
	770	break;
	771	case QHT_ITER_RM:
	772	if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) {
	773	/* replace i with the last valid element in the bucket */
	774	seqlock_write_begin(&head->sequence);
	775	qht_bucket_remove_entry(b, i);
	776	seqlock_write_end(&head->sequence);
	777	qht_bucket_debug__locked(b);
	778	/* reevaluate i, since it just got replaced */
	779	i--;
	780	continue;
	781	}
	782	break;
	783	default:
	784	g_assert_not_reached();
	785	}
	786	}
	787	b = b->next;
	788	} while (b);
	789	}
	790
	791	/* call with all of the map's locks held */
	792	static inline void qht_map_iter__all_locked(struct qht_map *map,
	793	const struct qht_iter *iter,
	794	void *userp)
	795	{
	796	size_t i;
	797
	798	for (i = 0; i < map->n_buckets; i++) {
	799	qht_bucket_iter(&map->buckets[i], iter, userp);
	800	}
	801	}
	802
	803	static inline void
	804	do_qht_iter(struct qht ht, const struct qht_iter iter, void *userp)
	805	{
	806	struct qht_map *map;
	807
	808	map = qatomic_rcu_read(&ht->map);
	809	qht_map_lock_buckets(map);
	810	qht_map_iter__all_locked(map, iter, userp);
	811	qht_map_unlock_buckets(map);
	812	}
	813
	814	void qht_iter(struct qht ht, qht_iter_func_t func, void userp)
	815	{
	816	const struct qht_iter iter = {
	817	.f.retvoid = func,
	818	.type = QHT_ITER_VOID,
	819	};
	820
	821	do_qht_iter(ht, &iter, userp);
	822	}
	823
	824	void qht_iter_remove(struct qht ht, qht_iter_bool_func_t func, void userp)
	825	{
	826	const struct qht_iter iter = {
	827	.f.retbool = func,
	828	.type = QHT_ITER_RM,
	829	};
	830
	831	do_qht_iter(ht, &iter, userp);
	832	}
	833
	834	struct qht_map_copy_data {
	835	struct qht *ht;
	836	struct qht_map *new;
	837	};
	838
	839	static void qht_map_copy(void p, uint32_t hash, void userp)
	840	{
	841	struct qht_map_copy_data *data = userp;
	842	struct qht *ht = data->ht;
	843	struct qht_map *new = data->new;
	844	struct qht_bucket *b = qht_map_to_bucket(new, hash);
	845
	846	/* no need to acquire b->lock because no thread has seen this map yet */
	847	qht_insert__locked(ht, new, b, p, hash, NULL);
	848	}
	849
	850	/*
	851	* Atomically perform a resize and/or reset.
	852	* Call with ht->lock held.
	853	*/
	854	static void qht_do_resize_reset(struct qht ht, struct qht_map new, bool reset)
	855	{
	856	struct qht_map *old;
	857	const struct qht_iter iter = {
	858	.f.retvoid = qht_map_copy,
	859	.type = QHT_ITER_VOID,
	860	};
	861	struct qht_map_copy_data data;
	862
	863	old = ht->map;
	864	qht_map_lock_buckets(old);
	865
	866	if (reset) {
	867	qht_map_reset__all_locked(old);
	868	}
	869
	870	if (new == NULL) {
	871	qht_map_unlock_buckets(old);
	872	return;
	873	}
	874
	875	g_assert(new->n_buckets != old->n_buckets);
	876	data.ht = ht;
	877	data.new = new;
	878	qht_map_iter__all_locked(old, &iter, &data);
	879	qht_map_debug__all_locked(new);
	880
	881	qatomic_rcu_set(&ht->map, new);
	882	qht_map_unlock_buckets(old);
	883	call_rcu(old, qht_map_destroy, rcu);
	884	}
	885
	886	bool qht_resize(struct qht *ht, size_t n_elems)
	887	{
	888	size_t n_buckets = qht_elems_to_buckets(n_elems);
	889	size_t ret = false;
	890
	891	qht_lock(ht);
	892	if (n_buckets != ht->map->n_buckets) {
	893	struct qht_map *new;
	894
	895	new = qht_map_create(n_buckets);
	896	qht_do_resize(ht, new);
	897	ret = true;
	898	}
	899	qht_unlock(ht);
	900
	901	return ret;
	902	}
	903
	904	/* pass @stats to qht_statistics_destroy() when done */
	905	void qht_statistics_init(const struct qht ht, struct qht_stats stats)
	906	{
	907	const struct qht_map *map;
	908	int i;
	909
	910	map = qatomic_rcu_read(&ht->map);
	911
	912	stats->used_head_buckets = 0;
	913	stats->entries = 0;
	914	qdist_init(&stats->chain);
	915	qdist_init(&stats->occupancy);
	916	/* bail out if the qht has not yet been initialized */
	917	if (unlikely(map == NULL)) {
	918	stats->head_buckets = 0;
	919	return;
	920	}
	921	stats->head_buckets = map->n_buckets;
	922
	923	for (i = 0; i < map->n_buckets; i++) {
	924	const struct qht_bucket *head = &map->buckets[i];
	925	const struct qht_bucket *b;
	926	unsigned int version;
	927	size_t buckets;
	928	size_t entries;
	929	int j;
	930
	931	do {
	932	version = seqlock_read_begin(&head->sequence);
	933	buckets = 0;
	934	entries = 0;
	935	b = head;
	936	do {
	937	for (j = 0; j < QHT_BUCKET_ENTRIES; j++) {
	938	if (qatomic_read(&b->pointers[j]) == NULL) {
	939	break;
	940	}
	941	entries++;
	942	}
	943	buckets++;
	944	b = qatomic_rcu_read(&b->next);
	945	} while (b);
	946	} while (seqlock_read_retry(&head->sequence, version));
	947
	948	if (entries) {
	949	qdist_inc(&stats->chain, buckets);
	950	qdist_inc(&stats->occupancy,
	951	(double)entries / QHT_BUCKET_ENTRIES / buckets);
	952	stats->used_head_buckets++;
	953	stats->entries += entries;
	954	} else {
	955	qdist_inc(&stats->occupancy, 0);
	956	}
	957	}
	958	}
	959
	960	void qht_statistics_destroy(struct qht_stats *stats)
	961	{
	962	qdist_destroy(&stats->occupancy);
	963	qdist_destroy(&stats->chain);
	964	}