]>
Commit | Line | Data |
---|---|---|
515864a0 EC |
1 | /* |
2 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
3 | * | |
4 | * License: GNU GPL, version 2 or later. | |
5 | * See the COPYING file in the top-level directory. | |
6 | */ | |
7 | #include "qemu/osdep.h" | |
515864a0 EC |
8 | #include "qemu/processor.h" |
9 | #include "qemu/atomic.h" | |
10 | #include "qemu/qht.h" | |
11 | #include "qemu/rcu.h" | |
fe656e31 | 12 | #include "qemu/xxhash.h" |
515864a0 EC |
13 | |
14 | struct thread_stats { | |
15 | size_t rd; | |
16 | size_t not_rd; | |
17 | size_t in; | |
18 | size_t not_in; | |
19 | size_t rm; | |
20 | size_t not_rm; | |
21 | size_t rz; | |
22 | size_t not_rz; | |
23 | }; | |
24 | ||
25 | struct thread_info { | |
26 | void (*func)(struct thread_info *); | |
27 | struct thread_stats stats; | |
28 | uint64_t r; | |
29 | bool write_op; /* writes alternate between insertions and removals */ | |
30 | bool resize_down; | |
31 | } QEMU_ALIGNED(64); /* avoid false sharing among threads */ | |
32 | ||
33 | static struct qht ht; | |
34 | static QemuThread *rw_threads; | |
35 | ||
36 | #define DEFAULT_RANGE (4096) | |
37 | #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE | |
38 | ||
39 | static unsigned int duration = 1; | |
40 | static unsigned int n_rw_threads = 1; | |
41 | static unsigned long lookup_range = DEFAULT_RANGE; | |
42 | static unsigned long update_range = DEFAULT_RANGE; | |
43 | static size_t init_range = DEFAULT_RANGE; | |
44 | static size_t init_size = DEFAULT_RANGE; | |
45 | static size_t n_ready_threads; | |
46 | static long populate_offset; | |
47 | static long *keys; | |
48 | ||
49 | static size_t resize_min; | |
50 | static size_t resize_max; | |
51 | static struct thread_info *rz_info; | |
52 | static unsigned long resize_delay = 1000; | |
53 | static double resize_rate; /* 0.0 to 1.0 */ | |
54 | static unsigned int n_rz_threads = 1; | |
55 | static QemuThread *rz_threads; | |
bd224fce | 56 | static bool precompute_hash; |
515864a0 EC |
57 | |
58 | static double update_rate; /* 0.0 to 1.0 */ | |
59 | static uint64_t update_threshold; | |
60 | static uint64_t resize_threshold; | |
61 | ||
62 | static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; | |
63 | static int qht_mode; | |
64 | ||
65 | static bool test_start; | |
66 | static bool test_stop; | |
67 | ||
68 | static struct thread_info *rw_info; | |
69 | ||
70 | static const char commands_string[] = | |
71 | " -d = duration, in seconds\n" | |
72 | " -n = number of threads\n" | |
73 | "\n" | |
74 | " -o = offset at which keys start\n" | |
e132fde2 | 75 | " -p = precompute hashes\n" |
515864a0 EC |
76 | "\n" |
77 | " -g = set -s,-k,-K,-l,-r to the same value\n" | |
78 | " -s = initial size hint\n" | |
79 | " -k = initial number of keys\n" | |
80 | " -K = initial range of keys (will be rounded up to pow2)\n" | |
81 | " -l = lookup range of keys (will be rounded up to pow2)\n" | |
82 | " -r = update range of keys (will be rounded up to pow2)\n" | |
83 | "\n" | |
84 | " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" | |
85 | "\n" | |
86 | " -R = enable auto-resize\n" | |
87 | " -S = resize rate (0.0 to 100.0)\n" | |
88 | " -D = delay (in us) between potential resizes\n" | |
89 | " -N = number of resize threads"; | |
90 | ||
91 | static void usage_complete(int argc, char *argv[]) | |
92 | { | |
93 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); | |
94 | fprintf(stderr, "options:\n%s\n", commands_string); | |
95 | exit(-1); | |
96 | } | |
97 | ||
61b8cef1 | 98 | static bool is_equal(const void *ap, const void *bp) |
515864a0 | 99 | { |
61b8cef1 EC |
100 | const long *a = ap; |
101 | const long *b = bp; | |
515864a0 EC |
102 | |
103 | return *a == *b; | |
104 | } | |
105 | ||
bd224fce | 106 | static uint32_t h(unsigned long v) |
515864a0 | 107 | { |
c971d8fa | 108 | return qemu_xxhash2(v); |
515864a0 EC |
109 | } |
110 | ||
bd224fce EC |
111 | static uint32_t hval(unsigned long v) |
112 | { | |
113 | return v; | |
114 | } | |
115 | ||
116 | static uint32_t (*hfunc)(unsigned long v) = h; | |
117 | ||
515864a0 EC |
118 | /* |
119 | * From: https://en.wikipedia.org/wiki/Xorshift | |
120 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only | |
121 | * guaranteed to be >= INT_MAX). | |
122 | */ | |
123 | static uint64_t xorshift64star(uint64_t x) | |
124 | { | |
125 | x ^= x >> 12; /* a */ | |
126 | x ^= x << 25; /* b */ | |
127 | x ^= x >> 27; /* c */ | |
128 | return x * UINT64_C(2685821657736338717); | |
129 | } | |
130 | ||
131 | static void do_rz(struct thread_info *info) | |
132 | { | |
133 | struct thread_stats *stats = &info->stats; | |
134 | ||
135 | if (info->r < resize_threshold) { | |
136 | size_t size = info->resize_down ? resize_min : resize_max; | |
137 | bool resized; | |
138 | ||
139 | resized = qht_resize(&ht, size); | |
140 | info->resize_down = !info->resize_down; | |
141 | ||
142 | if (resized) { | |
143 | stats->rz++; | |
144 | } else { | |
145 | stats->not_rz++; | |
146 | } | |
147 | } | |
148 | g_usleep(resize_delay); | |
149 | } | |
150 | ||
151 | static void do_rw(struct thread_info *info) | |
152 | { | |
153 | struct thread_stats *stats = &info->stats; | |
154 | uint32_t hash; | |
155 | long *p; | |
156 | ||
157 | if (info->r >= update_threshold) { | |
158 | bool read; | |
159 | ||
160 | p = &keys[info->r & (lookup_range - 1)]; | |
bd224fce | 161 | hash = hfunc(*p); |
61b8cef1 | 162 | read = qht_lookup(&ht, p, hash); |
515864a0 EC |
163 | if (read) { |
164 | stats->rd++; | |
165 | } else { | |
166 | stats->not_rd++; | |
167 | } | |
168 | } else { | |
169 | p = &keys[info->r & (update_range - 1)]; | |
bd224fce | 170 | hash = hfunc(*p); |
515864a0 EC |
171 | if (info->write_op) { |
172 | bool written = false; | |
173 | ||
61b8cef1 | 174 | if (qht_lookup(&ht, p, hash) == NULL) { |
32359d52 | 175 | written = qht_insert(&ht, p, hash, NULL); |
515864a0 EC |
176 | } |
177 | if (written) { | |
178 | stats->in++; | |
179 | } else { | |
180 | stats->not_in++; | |
181 | } | |
182 | } else { | |
183 | bool removed = false; | |
184 | ||
61b8cef1 | 185 | if (qht_lookup(&ht, p, hash)) { |
515864a0 EC |
186 | removed = qht_remove(&ht, p, hash); |
187 | } | |
188 | if (removed) { | |
189 | stats->rm++; | |
190 | } else { | |
191 | stats->not_rm++; | |
192 | } | |
193 | } | |
194 | info->write_op = !info->write_op; | |
195 | } | |
196 | } | |
197 | ||
198 | static void *thread_func(void *p) | |
199 | { | |
200 | struct thread_info *info = p; | |
201 | ||
202 | rcu_register_thread(); | |
203 | ||
204 | atomic_inc(&n_ready_threads); | |
977ec47d | 205 | while (!atomic_read(&test_start)) { |
515864a0 EC |
206 | cpu_relax(); |
207 | } | |
208 | ||
209 | rcu_read_lock(); | |
210 | while (!atomic_read(&test_stop)) { | |
211 | info->r = xorshift64star(info->r); | |
212 | info->func(info); | |
213 | } | |
214 | rcu_read_unlock(); | |
215 | ||
216 | rcu_unregister_thread(); | |
217 | return NULL; | |
218 | } | |
219 | ||
220 | /* sets everything except info->func */ | |
221 | static void prepare_thread_info(struct thread_info *info, int i) | |
222 | { | |
223 | /* seed for the RNG; each thread should have a different one */ | |
224 | info->r = (i + 1) ^ time(NULL); | |
225 | /* the first update will be a write */ | |
226 | info->write_op = true; | |
227 | /* the first resize will be down */ | |
228 | info->resize_down = true; | |
229 | ||
230 | memset(&info->stats, 0, sizeof(info->stats)); | |
231 | } | |
232 | ||
233 | static void | |
234 | th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, | |
235 | void (*func)(struct thread_info *), int offset, int n) | |
236 | { | |
237 | struct thread_info *info; | |
238 | QemuThread *th; | |
239 | int i; | |
240 | ||
241 | th = g_malloc(sizeof(*th) * n); | |
242 | *threads = th; | |
243 | ||
244 | info = qemu_memalign(64, sizeof(*info) * n); | |
245 | *infos = info; | |
246 | ||
247 | for (i = 0; i < n; i++) { | |
248 | prepare_thread_info(&info[i], offset + i); | |
249 | info[i].func = func; | |
250 | qemu_thread_create(&th[i], name, thread_func, &info[i], | |
251 | QEMU_THREAD_JOINABLE); | |
252 | } | |
253 | } | |
254 | ||
255 | static void create_threads(void) | |
256 | { | |
257 | th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); | |
258 | th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); | |
259 | } | |
260 | ||
261 | static void pr_params(void) | |
262 | { | |
263 | printf("Parameters:\n"); | |
264 | printf(" duration: %d s\n", duration); | |
265 | printf(" # of threads: %u\n", n_rw_threads); | |
266 | printf(" initial # of keys: %zu\n", init_size); | |
267 | printf(" initial size hint: %zu\n", qht_n_elems); | |
268 | printf(" auto-resize: %s\n", | |
269 | qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); | |
270 | if (resize_rate) { | |
271 | printf(" resize_rate: %f%%\n", resize_rate * 100.0); | |
272 | printf(" resize range: %zu-%zu\n", resize_min, resize_max); | |
273 | printf(" # resize threads %u\n", n_rz_threads); | |
274 | } | |
275 | printf(" update rate: %f%%\n", update_rate * 100.0); | |
276 | printf(" offset: %ld\n", populate_offset); | |
277 | printf(" initial key range: %zu\n", init_range); | |
278 | printf(" lookup range: %lu\n", lookup_range); | |
279 | printf(" update range: %lu\n", update_range); | |
280 | } | |
281 | ||
282 | static void do_threshold(double rate, uint64_t *threshold) | |
283 | { | |
284 | if (rate == 1.0) { | |
285 | *threshold = UINT64_MAX; | |
286 | } else { | |
287 | *threshold = rate * UINT64_MAX; | |
288 | } | |
289 | } | |
290 | ||
291 | static void htable_init(void) | |
292 | { | |
293 | unsigned long n = MAX(init_range, update_range); | |
294 | uint64_t r = time(NULL); | |
295 | size_t retries = 0; | |
296 | size_t i; | |
297 | ||
298 | /* avoid allocating memory later by allocating all the keys now */ | |
299 | keys = g_malloc(sizeof(*keys) * n); | |
300 | for (i = 0; i < n; i++) { | |
bd224fce EC |
301 | long val = populate_offset + i; |
302 | ||
303 | keys[i] = precompute_hash ? h(val) : hval(val); | |
515864a0 EC |
304 | } |
305 | ||
306 | /* some sanity checks */ | |
307 | g_assert_cmpuint(lookup_range, <=, n); | |
308 | ||
309 | /* compute thresholds */ | |
310 | do_threshold(update_rate, &update_threshold); | |
311 | do_threshold(resize_rate, &resize_threshold); | |
312 | ||
313 | if (resize_rate) { | |
314 | resize_min = n / 2; | |
315 | resize_max = n; | |
316 | assert(resize_min < resize_max); | |
317 | } else { | |
318 | n_rz_threads = 0; | |
319 | } | |
320 | ||
321 | /* initialize the hash table */ | |
61b8cef1 | 322 | qht_init(&ht, is_equal, qht_n_elems, qht_mode); |
515864a0 EC |
323 | assert(init_size <= init_range); |
324 | ||
325 | pr_params(); | |
326 | ||
327 | fprintf(stderr, "Initialization: populating %zu items...", init_size); | |
328 | for (i = 0; i < init_size; i++) { | |
329 | for (;;) { | |
330 | uint32_t hash; | |
331 | long *p; | |
332 | ||
333 | r = xorshift64star(r); | |
334 | p = &keys[r & (init_range - 1)]; | |
bd224fce | 335 | hash = hfunc(*p); |
32359d52 | 336 | if (qht_insert(&ht, p, hash, NULL)) { |
515864a0 EC |
337 | break; |
338 | } | |
339 | retries++; | |
340 | } | |
341 | } | |
342 | fprintf(stderr, " populated after %zu retries\n", retries); | |
343 | } | |
344 | ||
345 | static void add_stats(struct thread_stats *s, struct thread_info *info, int n) | |
346 | { | |
347 | int i; | |
348 | ||
349 | for (i = 0; i < n; i++) { | |
350 | struct thread_stats *stats = &info[i].stats; | |
351 | ||
352 | s->rd += stats->rd; | |
353 | s->not_rd += stats->not_rd; | |
354 | ||
355 | s->in += stats->in; | |
356 | s->not_in += stats->not_in; | |
357 | ||
358 | s->rm += stats->rm; | |
359 | s->not_rm += stats->not_rm; | |
360 | ||
361 | s->rz += stats->rz; | |
362 | s->not_rz += stats->not_rz; | |
363 | } | |
364 | } | |
365 | ||
366 | static void pr_stats(void) | |
367 | { | |
368 | struct thread_stats s = {}; | |
369 | double tx; | |
370 | ||
371 | add_stats(&s, rw_info, n_rw_threads); | |
372 | add_stats(&s, rz_info, n_rz_threads); | |
373 | ||
374 | printf("Results:\n"); | |
375 | ||
376 | if (resize_rate) { | |
377 | printf(" Resizes: %zu (%.2f%% of %zu)\n", | |
378 | s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); | |
379 | } | |
380 | ||
381 | printf(" Read: %.2f M (%.2f%% of %.2fM)\n", | |
382 | (double)s.rd / 1e6, | |
383 | (double)s.rd / (s.rd + s.not_rd) * 100, | |
384 | (double)(s.rd + s.not_rd) / 1e6); | |
385 | printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", | |
386 | (double)s.in / 1e6, | |
387 | (double)s.in / (s.in + s.not_in) * 100, | |
388 | (double)(s.in + s.not_in) / 1e6); | |
389 | printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", | |
390 | (double)s.rm / 1e6, | |
391 | (double)s.rm / (s.rm + s.not_rm) * 100, | |
392 | (double)(s.rm + s.not_rm) / 1e6); | |
393 | ||
394 | tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; | |
395 | printf(" Throughput: %.2f MT/s\n", tx); | |
396 | printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); | |
397 | } | |
398 | ||
399 | static void run_test(void) | |
400 | { | |
515864a0 EC |
401 | int i; |
402 | ||
403 | while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { | |
404 | cpu_relax(); | |
405 | } | |
eb4f8e10 | 406 | |
977ec47d | 407 | atomic_set(&test_start, true); |
eb4f8e10 | 408 | g_usleep(duration * G_USEC_PER_SEC); |
977ec47d | 409 | atomic_set(&test_stop, true); |
515864a0 EC |
410 | |
411 | for (i = 0; i < n_rw_threads; i++) { | |
412 | qemu_thread_join(&rw_threads[i]); | |
413 | } | |
414 | for (i = 0; i < n_rz_threads; i++) { | |
415 | qemu_thread_join(&rz_threads[i]); | |
416 | } | |
417 | } | |
418 | ||
419 | static void parse_args(int argc, char *argv[]) | |
420 | { | |
421 | int c; | |
422 | ||
423 | for (;;) { | |
bd224fce | 424 | c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:pr:Rs:S:u:"); |
515864a0 EC |
425 | if (c < 0) { |
426 | break; | |
427 | } | |
428 | switch (c) { | |
429 | case 'd': | |
430 | duration = atoi(optarg); | |
431 | break; | |
432 | case 'D': | |
433 | resize_delay = atol(optarg); | |
434 | break; | |
435 | case 'g': | |
436 | init_range = pow2ceil(atol(optarg)); | |
437 | lookup_range = pow2ceil(atol(optarg)); | |
438 | update_range = pow2ceil(atol(optarg)); | |
439 | qht_n_elems = atol(optarg); | |
440 | init_size = atol(optarg); | |
441 | break; | |
442 | case 'h': | |
443 | usage_complete(argc, argv); | |
444 | exit(0); | |
445 | case 'k': | |
446 | init_size = atol(optarg); | |
447 | break; | |
448 | case 'K': | |
449 | init_range = pow2ceil(atol(optarg)); | |
450 | break; | |
451 | case 'l': | |
452 | lookup_range = pow2ceil(atol(optarg)); | |
453 | break; | |
454 | case 'n': | |
455 | n_rw_threads = atoi(optarg); | |
456 | break; | |
457 | case 'N': | |
458 | n_rz_threads = atoi(optarg); | |
459 | break; | |
460 | case 'o': | |
461 | populate_offset = atol(optarg); | |
462 | break; | |
bd224fce EC |
463 | case 'p': |
464 | precompute_hash = true; | |
465 | hfunc = hval; | |
466 | break; | |
515864a0 EC |
467 | case 'r': |
468 | update_range = pow2ceil(atol(optarg)); | |
469 | break; | |
470 | case 'R': | |
471 | qht_mode |= QHT_MODE_AUTO_RESIZE; | |
472 | break; | |
473 | case 's': | |
474 | qht_n_elems = atol(optarg); | |
475 | break; | |
476 | case 'S': | |
477 | resize_rate = atof(optarg) / 100.0; | |
478 | if (resize_rate > 1.0) { | |
479 | resize_rate = 1.0; | |
480 | } | |
481 | break; | |
482 | case 'u': | |
483 | update_rate = atof(optarg) / 100.0; | |
484 | if (update_rate > 1.0) { | |
485 | update_rate = 1.0; | |
486 | } | |
487 | break; | |
488 | } | |
489 | } | |
490 | } | |
491 | ||
492 | int main(int argc, char *argv[]) | |
493 | { | |
494 | parse_args(argc, argv); | |
495 | htable_init(); | |
496 | create_threads(); | |
497 | run_test(); | |
498 | pr_stats(); | |
499 | return 0; | |
500 | } |