]>
Commit | Line | Data |
---|---|---|
515864a0 EC |
1 | /* |
2 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
3 | * | |
4 | * License: GNU GPL, version 2 or later. | |
5 | * See the COPYING file in the top-level directory. | |
6 | */ | |
7 | #include "qemu/osdep.h" | |
515864a0 EC |
8 | #include "qemu/processor.h" |
9 | #include "qemu/atomic.h" | |
10 | #include "qemu/qht.h" | |
11 | #include "qemu/rcu.h" | |
12 | #include "exec/tb-hash-xx.h" | |
13 | ||
14 | struct thread_stats { | |
15 | size_t rd; | |
16 | size_t not_rd; | |
17 | size_t in; | |
18 | size_t not_in; | |
19 | size_t rm; | |
20 | size_t not_rm; | |
21 | size_t rz; | |
22 | size_t not_rz; | |
23 | }; | |
24 | ||
25 | struct thread_info { | |
26 | void (*func)(struct thread_info *); | |
27 | struct thread_stats stats; | |
28 | uint64_t r; | |
29 | bool write_op; /* writes alternate between insertions and removals */ | |
30 | bool resize_down; | |
31 | } QEMU_ALIGNED(64); /* avoid false sharing among threads */ | |
32 | ||
33 | static struct qht ht; | |
34 | static QemuThread *rw_threads; | |
35 | ||
36 | #define DEFAULT_RANGE (4096) | |
37 | #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE | |
38 | ||
39 | static unsigned int duration = 1; | |
40 | static unsigned int n_rw_threads = 1; | |
41 | static unsigned long lookup_range = DEFAULT_RANGE; | |
42 | static unsigned long update_range = DEFAULT_RANGE; | |
43 | static size_t init_range = DEFAULT_RANGE; | |
44 | static size_t init_size = DEFAULT_RANGE; | |
45 | static size_t n_ready_threads; | |
46 | static long populate_offset; | |
47 | static long *keys; | |
48 | ||
49 | static size_t resize_min; | |
50 | static size_t resize_max; | |
51 | static struct thread_info *rz_info; | |
52 | static unsigned long resize_delay = 1000; | |
53 | static double resize_rate; /* 0.0 to 1.0 */ | |
54 | static unsigned int n_rz_threads = 1; | |
55 | static QemuThread *rz_threads; | |
bd224fce | 56 | static bool precompute_hash; |
515864a0 EC |
57 | |
58 | static double update_rate; /* 0.0 to 1.0 */ | |
59 | static uint64_t update_threshold; | |
60 | static uint64_t resize_threshold; | |
61 | ||
62 | static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; | |
63 | static int qht_mode; | |
64 | ||
65 | static bool test_start; | |
66 | static bool test_stop; | |
67 | ||
68 | static struct thread_info *rw_info; | |
69 | ||
70 | static const char commands_string[] = | |
71 | " -d = duration, in seconds\n" | |
72 | " -n = number of threads\n" | |
73 | "\n" | |
74 | " -o = offset at which keys start\n" | |
75 | "\n" | |
76 | " -g = set -s,-k,-K,-l,-r to the same value\n" | |
77 | " -s = initial size hint\n" | |
78 | " -k = initial number of keys\n" | |
79 | " -K = initial range of keys (will be rounded up to pow2)\n" | |
80 | " -l = lookup range of keys (will be rounded up to pow2)\n" | |
81 | " -r = update range of keys (will be rounded up to pow2)\n" | |
82 | "\n" | |
83 | " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" | |
84 | "\n" | |
85 | " -R = enable auto-resize\n" | |
86 | " -S = resize rate (0.0 to 100.0)\n" | |
87 | " -D = delay (in us) between potential resizes\n" | |
88 | " -N = number of resize threads"; | |
89 | ||
90 | static void usage_complete(int argc, char *argv[]) | |
91 | { | |
92 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); | |
93 | fprintf(stderr, "options:\n%s\n", commands_string); | |
94 | exit(-1); | |
95 | } | |
96 | ||
61b8cef1 | 97 | static bool is_equal(const void *ap, const void *bp) |
515864a0 | 98 | { |
61b8cef1 EC |
99 | const long *a = ap; |
100 | const long *b = bp; | |
515864a0 EC |
101 | |
102 | return *a == *b; | |
103 | } | |
104 | ||
bd224fce | 105 | static uint32_t h(unsigned long v) |
515864a0 | 106 | { |
4e2ca83e | 107 | return tb_hash_func7(v, 0, 0, 0, 0); |
515864a0 EC |
108 | } |
109 | ||
bd224fce EC |
110 | static uint32_t hval(unsigned long v) |
111 | { | |
112 | return v; | |
113 | } | |
114 | ||
115 | static uint32_t (*hfunc)(unsigned long v) = h; | |
116 | ||
515864a0 EC |
117 | /* |
118 | * From: https://en.wikipedia.org/wiki/Xorshift | |
119 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only | |
120 | * guaranteed to be >= INT_MAX). | |
121 | */ | |
122 | static uint64_t xorshift64star(uint64_t x) | |
123 | { | |
124 | x ^= x >> 12; /* a */ | |
125 | x ^= x << 25; /* b */ | |
126 | x ^= x >> 27; /* c */ | |
127 | return x * UINT64_C(2685821657736338717); | |
128 | } | |
129 | ||
130 | static void do_rz(struct thread_info *info) | |
131 | { | |
132 | struct thread_stats *stats = &info->stats; | |
133 | ||
134 | if (info->r < resize_threshold) { | |
135 | size_t size = info->resize_down ? resize_min : resize_max; | |
136 | bool resized; | |
137 | ||
138 | resized = qht_resize(&ht, size); | |
139 | info->resize_down = !info->resize_down; | |
140 | ||
141 | if (resized) { | |
142 | stats->rz++; | |
143 | } else { | |
144 | stats->not_rz++; | |
145 | } | |
146 | } | |
147 | g_usleep(resize_delay); | |
148 | } | |
149 | ||
150 | static void do_rw(struct thread_info *info) | |
151 | { | |
152 | struct thread_stats *stats = &info->stats; | |
153 | uint32_t hash; | |
154 | long *p; | |
155 | ||
156 | if (info->r >= update_threshold) { | |
157 | bool read; | |
158 | ||
159 | p = &keys[info->r & (lookup_range - 1)]; | |
bd224fce | 160 | hash = hfunc(*p); |
61b8cef1 | 161 | read = qht_lookup(&ht, p, hash); |
515864a0 EC |
162 | if (read) { |
163 | stats->rd++; | |
164 | } else { | |
165 | stats->not_rd++; | |
166 | } | |
167 | } else { | |
168 | p = &keys[info->r & (update_range - 1)]; | |
bd224fce | 169 | hash = hfunc(*p); |
515864a0 EC |
170 | if (info->write_op) { |
171 | bool written = false; | |
172 | ||
61b8cef1 | 173 | if (qht_lookup(&ht, p, hash) == NULL) { |
32359d52 | 174 | written = qht_insert(&ht, p, hash, NULL); |
515864a0 EC |
175 | } |
176 | if (written) { | |
177 | stats->in++; | |
178 | } else { | |
179 | stats->not_in++; | |
180 | } | |
181 | } else { | |
182 | bool removed = false; | |
183 | ||
61b8cef1 | 184 | if (qht_lookup(&ht, p, hash)) { |
515864a0 EC |
185 | removed = qht_remove(&ht, p, hash); |
186 | } | |
187 | if (removed) { | |
188 | stats->rm++; | |
189 | } else { | |
190 | stats->not_rm++; | |
191 | } | |
192 | } | |
193 | info->write_op = !info->write_op; | |
194 | } | |
195 | } | |
196 | ||
197 | static void *thread_func(void *p) | |
198 | { | |
199 | struct thread_info *info = p; | |
200 | ||
201 | rcu_register_thread(); | |
202 | ||
203 | atomic_inc(&n_ready_threads); | |
977ec47d | 204 | while (!atomic_read(&test_start)) { |
515864a0 EC |
205 | cpu_relax(); |
206 | } | |
207 | ||
208 | rcu_read_lock(); | |
209 | while (!atomic_read(&test_stop)) { | |
210 | info->r = xorshift64star(info->r); | |
211 | info->func(info); | |
212 | } | |
213 | rcu_read_unlock(); | |
214 | ||
215 | rcu_unregister_thread(); | |
216 | return NULL; | |
217 | } | |
218 | ||
219 | /* sets everything except info->func */ | |
220 | static void prepare_thread_info(struct thread_info *info, int i) | |
221 | { | |
222 | /* seed for the RNG; each thread should have a different one */ | |
223 | info->r = (i + 1) ^ time(NULL); | |
224 | /* the first update will be a write */ | |
225 | info->write_op = true; | |
226 | /* the first resize will be down */ | |
227 | info->resize_down = true; | |
228 | ||
229 | memset(&info->stats, 0, sizeof(info->stats)); | |
230 | } | |
231 | ||
232 | static void | |
233 | th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, | |
234 | void (*func)(struct thread_info *), int offset, int n) | |
235 | { | |
236 | struct thread_info *info; | |
237 | QemuThread *th; | |
238 | int i; | |
239 | ||
240 | th = g_malloc(sizeof(*th) * n); | |
241 | *threads = th; | |
242 | ||
243 | info = qemu_memalign(64, sizeof(*info) * n); | |
244 | *infos = info; | |
245 | ||
246 | for (i = 0; i < n; i++) { | |
247 | prepare_thread_info(&info[i], offset + i); | |
248 | info[i].func = func; | |
249 | qemu_thread_create(&th[i], name, thread_func, &info[i], | |
250 | QEMU_THREAD_JOINABLE); | |
251 | } | |
252 | } | |
253 | ||
254 | static void create_threads(void) | |
255 | { | |
256 | th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); | |
257 | th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); | |
258 | } | |
259 | ||
260 | static void pr_params(void) | |
261 | { | |
262 | printf("Parameters:\n"); | |
263 | printf(" duration: %d s\n", duration); | |
264 | printf(" # of threads: %u\n", n_rw_threads); | |
265 | printf(" initial # of keys: %zu\n", init_size); | |
266 | printf(" initial size hint: %zu\n", qht_n_elems); | |
267 | printf(" auto-resize: %s\n", | |
268 | qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); | |
269 | if (resize_rate) { | |
270 | printf(" resize_rate: %f%%\n", resize_rate * 100.0); | |
271 | printf(" resize range: %zu-%zu\n", resize_min, resize_max); | |
272 | printf(" # resize threads %u\n", n_rz_threads); | |
273 | } | |
274 | printf(" update rate: %f%%\n", update_rate * 100.0); | |
275 | printf(" offset: %ld\n", populate_offset); | |
276 | printf(" initial key range: %zu\n", init_range); | |
277 | printf(" lookup range: %lu\n", lookup_range); | |
278 | printf(" update range: %lu\n", update_range); | |
279 | } | |
280 | ||
281 | static void do_threshold(double rate, uint64_t *threshold) | |
282 | { | |
283 | if (rate == 1.0) { | |
284 | *threshold = UINT64_MAX; | |
285 | } else { | |
286 | *threshold = rate * UINT64_MAX; | |
287 | } | |
288 | } | |
289 | ||
290 | static void htable_init(void) | |
291 | { | |
292 | unsigned long n = MAX(init_range, update_range); | |
293 | uint64_t r = time(NULL); | |
294 | size_t retries = 0; | |
295 | size_t i; | |
296 | ||
297 | /* avoid allocating memory later by allocating all the keys now */ | |
298 | keys = g_malloc(sizeof(*keys) * n); | |
299 | for (i = 0; i < n; i++) { | |
bd224fce EC |
300 | long val = populate_offset + i; |
301 | ||
302 | keys[i] = precompute_hash ? h(val) : hval(val); | |
515864a0 EC |
303 | } |
304 | ||
305 | /* some sanity checks */ | |
306 | g_assert_cmpuint(lookup_range, <=, n); | |
307 | ||
308 | /* compute thresholds */ | |
309 | do_threshold(update_rate, &update_threshold); | |
310 | do_threshold(resize_rate, &resize_threshold); | |
311 | ||
312 | if (resize_rate) { | |
313 | resize_min = n / 2; | |
314 | resize_max = n; | |
315 | assert(resize_min < resize_max); | |
316 | } else { | |
317 | n_rz_threads = 0; | |
318 | } | |
319 | ||
320 | /* initialize the hash table */ | |
61b8cef1 | 321 | qht_init(&ht, is_equal, qht_n_elems, qht_mode); |
515864a0 EC |
322 | assert(init_size <= init_range); |
323 | ||
324 | pr_params(); | |
325 | ||
326 | fprintf(stderr, "Initialization: populating %zu items...", init_size); | |
327 | for (i = 0; i < init_size; i++) { | |
328 | for (;;) { | |
329 | uint32_t hash; | |
330 | long *p; | |
331 | ||
332 | r = xorshift64star(r); | |
333 | p = &keys[r & (init_range - 1)]; | |
bd224fce | 334 | hash = hfunc(*p); |
32359d52 | 335 | if (qht_insert(&ht, p, hash, NULL)) { |
515864a0 EC |
336 | break; |
337 | } | |
338 | retries++; | |
339 | } | |
340 | } | |
341 | fprintf(stderr, " populated after %zu retries\n", retries); | |
342 | } | |
343 | ||
344 | static void add_stats(struct thread_stats *s, struct thread_info *info, int n) | |
345 | { | |
346 | int i; | |
347 | ||
348 | for (i = 0; i < n; i++) { | |
349 | struct thread_stats *stats = &info[i].stats; | |
350 | ||
351 | s->rd += stats->rd; | |
352 | s->not_rd += stats->not_rd; | |
353 | ||
354 | s->in += stats->in; | |
355 | s->not_in += stats->not_in; | |
356 | ||
357 | s->rm += stats->rm; | |
358 | s->not_rm += stats->not_rm; | |
359 | ||
360 | s->rz += stats->rz; | |
361 | s->not_rz += stats->not_rz; | |
362 | } | |
363 | } | |
364 | ||
365 | static void pr_stats(void) | |
366 | { | |
367 | struct thread_stats s = {}; | |
368 | double tx; | |
369 | ||
370 | add_stats(&s, rw_info, n_rw_threads); | |
371 | add_stats(&s, rz_info, n_rz_threads); | |
372 | ||
373 | printf("Results:\n"); | |
374 | ||
375 | if (resize_rate) { | |
376 | printf(" Resizes: %zu (%.2f%% of %zu)\n", | |
377 | s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); | |
378 | } | |
379 | ||
380 | printf(" Read: %.2f M (%.2f%% of %.2fM)\n", | |
381 | (double)s.rd / 1e6, | |
382 | (double)s.rd / (s.rd + s.not_rd) * 100, | |
383 | (double)(s.rd + s.not_rd) / 1e6); | |
384 | printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", | |
385 | (double)s.in / 1e6, | |
386 | (double)s.in / (s.in + s.not_in) * 100, | |
387 | (double)(s.in + s.not_in) / 1e6); | |
388 | printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", | |
389 | (double)s.rm / 1e6, | |
390 | (double)s.rm / (s.rm + s.not_rm) * 100, | |
391 | (double)(s.rm + s.not_rm) / 1e6); | |
392 | ||
393 | tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; | |
394 | printf(" Throughput: %.2f MT/s\n", tx); | |
395 | printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); | |
396 | } | |
397 | ||
398 | static void run_test(void) | |
399 | { | |
400 | unsigned int remaining; | |
401 | int i; | |
402 | ||
403 | while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { | |
404 | cpu_relax(); | |
405 | } | |
977ec47d | 406 | atomic_set(&test_start, true); |
515864a0 EC |
407 | do { |
408 | remaining = sleep(duration); | |
409 | } while (remaining); | |
977ec47d | 410 | atomic_set(&test_stop, true); |
515864a0 EC |
411 | |
412 | for (i = 0; i < n_rw_threads; i++) { | |
413 | qemu_thread_join(&rw_threads[i]); | |
414 | } | |
415 | for (i = 0; i < n_rz_threads; i++) { | |
416 | qemu_thread_join(&rz_threads[i]); | |
417 | } | |
418 | } | |
419 | ||
420 | static void parse_args(int argc, char *argv[]) | |
421 | { | |
422 | int c; | |
423 | ||
424 | for (;;) { | |
bd224fce | 425 | c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:pr:Rs:S:u:"); |
515864a0 EC |
426 | if (c < 0) { |
427 | break; | |
428 | } | |
429 | switch (c) { | |
430 | case 'd': | |
431 | duration = atoi(optarg); | |
432 | break; | |
433 | case 'D': | |
434 | resize_delay = atol(optarg); | |
435 | break; | |
436 | case 'g': | |
437 | init_range = pow2ceil(atol(optarg)); | |
438 | lookup_range = pow2ceil(atol(optarg)); | |
439 | update_range = pow2ceil(atol(optarg)); | |
440 | qht_n_elems = atol(optarg); | |
441 | init_size = atol(optarg); | |
442 | break; | |
443 | case 'h': | |
444 | usage_complete(argc, argv); | |
445 | exit(0); | |
446 | case 'k': | |
447 | init_size = atol(optarg); | |
448 | break; | |
449 | case 'K': | |
450 | init_range = pow2ceil(atol(optarg)); | |
451 | break; | |
452 | case 'l': | |
453 | lookup_range = pow2ceil(atol(optarg)); | |
454 | break; | |
455 | case 'n': | |
456 | n_rw_threads = atoi(optarg); | |
457 | break; | |
458 | case 'N': | |
459 | n_rz_threads = atoi(optarg); | |
460 | break; | |
461 | case 'o': | |
462 | populate_offset = atol(optarg); | |
463 | break; | |
bd224fce EC |
464 | case 'p': |
465 | precompute_hash = true; | |
466 | hfunc = hval; | |
467 | break; | |
515864a0 EC |
468 | case 'r': |
469 | update_range = pow2ceil(atol(optarg)); | |
470 | break; | |
471 | case 'R': | |
472 | qht_mode |= QHT_MODE_AUTO_RESIZE; | |
473 | break; | |
474 | case 's': | |
475 | qht_n_elems = atol(optarg); | |
476 | break; | |
477 | case 'S': | |
478 | resize_rate = atof(optarg) / 100.0; | |
479 | if (resize_rate > 1.0) { | |
480 | resize_rate = 1.0; | |
481 | } | |
482 | break; | |
483 | case 'u': | |
484 | update_rate = atof(optarg) / 100.0; | |
485 | if (update_rate > 1.0) { | |
486 | update_rate = 1.0; | |
487 | } | |
488 | break; | |
489 | } | |
490 | } | |
491 | } | |
492 | ||
493 | int main(int argc, char *argv[]) | |
494 | { | |
495 | parse_args(argc, argv); | |
496 | htable_init(); | |
497 | create_threads(); | |
498 | run_test(); | |
499 | pr_stats(); | |
500 | return 0; | |
501 | } |