]>
Commit | Line | Data |
---|---|---|
515864a0 EC |
1 | /* |
2 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
3 | * | |
4 | * License: GNU GPL, version 2 or later. | |
5 | * See the COPYING file in the top-level directory. | |
6 | */ | |
7 | #include "qemu/osdep.h" | |
515864a0 EC |
8 | #include "qemu/processor.h" |
9 | #include "qemu/atomic.h" | |
10 | #include "qemu/qht.h" | |
11 | #include "qemu/rcu.h" | |
12 | #include "exec/tb-hash-xx.h" | |
13 | ||
14 | struct thread_stats { | |
15 | size_t rd; | |
16 | size_t not_rd; | |
17 | size_t in; | |
18 | size_t not_in; | |
19 | size_t rm; | |
20 | size_t not_rm; | |
21 | size_t rz; | |
22 | size_t not_rz; | |
23 | }; | |
24 | ||
25 | struct thread_info { | |
26 | void (*func)(struct thread_info *); | |
27 | struct thread_stats stats; | |
28 | uint64_t r; | |
29 | bool write_op; /* writes alternate between insertions and removals */ | |
30 | bool resize_down; | |
31 | } QEMU_ALIGNED(64); /* avoid false sharing among threads */ | |
32 | ||
33 | static struct qht ht; | |
34 | static QemuThread *rw_threads; | |
35 | ||
36 | #define DEFAULT_RANGE (4096) | |
37 | #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE | |
38 | ||
39 | static unsigned int duration = 1; | |
40 | static unsigned int n_rw_threads = 1; | |
41 | static unsigned long lookup_range = DEFAULT_RANGE; | |
42 | static unsigned long update_range = DEFAULT_RANGE; | |
43 | static size_t init_range = DEFAULT_RANGE; | |
44 | static size_t init_size = DEFAULT_RANGE; | |
45 | static size_t n_ready_threads; | |
46 | static long populate_offset; | |
47 | static long *keys; | |
48 | ||
49 | static size_t resize_min; | |
50 | static size_t resize_max; | |
51 | static struct thread_info *rz_info; | |
52 | static unsigned long resize_delay = 1000; | |
53 | static double resize_rate; /* 0.0 to 1.0 */ | |
54 | static unsigned int n_rz_threads = 1; | |
55 | static QemuThread *rz_threads; | |
56 | ||
57 | static double update_rate; /* 0.0 to 1.0 */ | |
58 | static uint64_t update_threshold; | |
59 | static uint64_t resize_threshold; | |
60 | ||
61 | static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; | |
62 | static int qht_mode; | |
63 | ||
64 | static bool test_start; | |
65 | static bool test_stop; | |
66 | ||
67 | static struct thread_info *rw_info; | |
68 | ||
69 | static const char commands_string[] = | |
70 | " -d = duration, in seconds\n" | |
71 | " -n = number of threads\n" | |
72 | "\n" | |
73 | " -o = offset at which keys start\n" | |
74 | "\n" | |
75 | " -g = set -s,-k,-K,-l,-r to the same value\n" | |
76 | " -s = initial size hint\n" | |
77 | " -k = initial number of keys\n" | |
78 | " -K = initial range of keys (will be rounded up to pow2)\n" | |
79 | " -l = lookup range of keys (will be rounded up to pow2)\n" | |
80 | " -r = update range of keys (will be rounded up to pow2)\n" | |
81 | "\n" | |
82 | " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" | |
83 | "\n" | |
84 | " -R = enable auto-resize\n" | |
85 | " -S = resize rate (0.0 to 100.0)\n" | |
86 | " -D = delay (in us) between potential resizes\n" | |
87 | " -N = number of resize threads"; | |
88 | ||
89 | static void usage_complete(int argc, char *argv[]) | |
90 | { | |
91 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); | |
92 | fprintf(stderr, "options:\n%s\n", commands_string); | |
93 | exit(-1); | |
94 | } | |
95 | ||
96 | static bool is_equal(const void *obj, const void *userp) | |
97 | { | |
98 | const long *a = obj; | |
99 | const long *b = userp; | |
100 | ||
101 | return *a == *b; | |
102 | } | |
103 | ||
104 | static inline uint32_t h(unsigned long v) | |
105 | { | |
106 | return tb_hash_func5(v, 0, 0); | |
107 | } | |
108 | ||
109 | /* | |
110 | * From: https://en.wikipedia.org/wiki/Xorshift | |
111 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only | |
112 | * guaranteed to be >= INT_MAX). | |
113 | */ | |
114 | static uint64_t xorshift64star(uint64_t x) | |
115 | { | |
116 | x ^= x >> 12; /* a */ | |
117 | x ^= x << 25; /* b */ | |
118 | x ^= x >> 27; /* c */ | |
119 | return x * UINT64_C(2685821657736338717); | |
120 | } | |
121 | ||
122 | static void do_rz(struct thread_info *info) | |
123 | { | |
124 | struct thread_stats *stats = &info->stats; | |
125 | ||
126 | if (info->r < resize_threshold) { | |
127 | size_t size = info->resize_down ? resize_min : resize_max; | |
128 | bool resized; | |
129 | ||
130 | resized = qht_resize(&ht, size); | |
131 | info->resize_down = !info->resize_down; | |
132 | ||
133 | if (resized) { | |
134 | stats->rz++; | |
135 | } else { | |
136 | stats->not_rz++; | |
137 | } | |
138 | } | |
139 | g_usleep(resize_delay); | |
140 | } | |
141 | ||
142 | static void do_rw(struct thread_info *info) | |
143 | { | |
144 | struct thread_stats *stats = &info->stats; | |
145 | uint32_t hash; | |
146 | long *p; | |
147 | ||
148 | if (info->r >= update_threshold) { | |
149 | bool read; | |
150 | ||
151 | p = &keys[info->r & (lookup_range - 1)]; | |
152 | hash = h(*p); | |
153 | read = qht_lookup(&ht, is_equal, p, hash); | |
154 | if (read) { | |
155 | stats->rd++; | |
156 | } else { | |
157 | stats->not_rd++; | |
158 | } | |
159 | } else { | |
160 | p = &keys[info->r & (update_range - 1)]; | |
161 | hash = h(*p); | |
162 | if (info->write_op) { | |
163 | bool written = false; | |
164 | ||
165 | if (qht_lookup(&ht, is_equal, p, hash) == NULL) { | |
166 | written = qht_insert(&ht, p, hash); | |
167 | } | |
168 | if (written) { | |
169 | stats->in++; | |
170 | } else { | |
171 | stats->not_in++; | |
172 | } | |
173 | } else { | |
174 | bool removed = false; | |
175 | ||
176 | if (qht_lookup(&ht, is_equal, p, hash)) { | |
177 | removed = qht_remove(&ht, p, hash); | |
178 | } | |
179 | if (removed) { | |
180 | stats->rm++; | |
181 | } else { | |
182 | stats->not_rm++; | |
183 | } | |
184 | } | |
185 | info->write_op = !info->write_op; | |
186 | } | |
187 | } | |
188 | ||
189 | static void *thread_func(void *p) | |
190 | { | |
191 | struct thread_info *info = p; | |
192 | ||
193 | rcu_register_thread(); | |
194 | ||
195 | atomic_inc(&n_ready_threads); | |
977ec47d | 196 | while (!atomic_read(&test_start)) { |
515864a0 EC |
197 | cpu_relax(); |
198 | } | |
199 | ||
200 | rcu_read_lock(); | |
201 | while (!atomic_read(&test_stop)) { | |
202 | info->r = xorshift64star(info->r); | |
203 | info->func(info); | |
204 | } | |
205 | rcu_read_unlock(); | |
206 | ||
207 | rcu_unregister_thread(); | |
208 | return NULL; | |
209 | } | |
210 | ||
211 | /* sets everything except info->func */ | |
212 | static void prepare_thread_info(struct thread_info *info, int i) | |
213 | { | |
214 | /* seed for the RNG; each thread should have a different one */ | |
215 | info->r = (i + 1) ^ time(NULL); | |
216 | /* the first update will be a write */ | |
217 | info->write_op = true; | |
218 | /* the first resize will be down */ | |
219 | info->resize_down = true; | |
220 | ||
221 | memset(&info->stats, 0, sizeof(info->stats)); | |
222 | } | |
223 | ||
224 | static void | |
225 | th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, | |
226 | void (*func)(struct thread_info *), int offset, int n) | |
227 | { | |
228 | struct thread_info *info; | |
229 | QemuThread *th; | |
230 | int i; | |
231 | ||
232 | th = g_malloc(sizeof(*th) * n); | |
233 | *threads = th; | |
234 | ||
235 | info = qemu_memalign(64, sizeof(*info) * n); | |
236 | *infos = info; | |
237 | ||
238 | for (i = 0; i < n; i++) { | |
239 | prepare_thread_info(&info[i], offset + i); | |
240 | info[i].func = func; | |
241 | qemu_thread_create(&th[i], name, thread_func, &info[i], | |
242 | QEMU_THREAD_JOINABLE); | |
243 | } | |
244 | } | |
245 | ||
246 | static void create_threads(void) | |
247 | { | |
248 | th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); | |
249 | th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); | |
250 | } | |
251 | ||
252 | static void pr_params(void) | |
253 | { | |
254 | printf("Parameters:\n"); | |
255 | printf(" duration: %d s\n", duration); | |
256 | printf(" # of threads: %u\n", n_rw_threads); | |
257 | printf(" initial # of keys: %zu\n", init_size); | |
258 | printf(" initial size hint: %zu\n", qht_n_elems); | |
259 | printf(" auto-resize: %s\n", | |
260 | qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); | |
261 | if (resize_rate) { | |
262 | printf(" resize_rate: %f%%\n", resize_rate * 100.0); | |
263 | printf(" resize range: %zu-%zu\n", resize_min, resize_max); | |
264 | printf(" # resize threads %u\n", n_rz_threads); | |
265 | } | |
266 | printf(" update rate: %f%%\n", update_rate * 100.0); | |
267 | printf(" offset: %ld\n", populate_offset); | |
268 | printf(" initial key range: %zu\n", init_range); | |
269 | printf(" lookup range: %lu\n", lookup_range); | |
270 | printf(" update range: %lu\n", update_range); | |
271 | } | |
272 | ||
273 | static void do_threshold(double rate, uint64_t *threshold) | |
274 | { | |
275 | if (rate == 1.0) { | |
276 | *threshold = UINT64_MAX; | |
277 | } else { | |
278 | *threshold = rate * UINT64_MAX; | |
279 | } | |
280 | } | |
281 | ||
282 | static void htable_init(void) | |
283 | { | |
284 | unsigned long n = MAX(init_range, update_range); | |
285 | uint64_t r = time(NULL); | |
286 | size_t retries = 0; | |
287 | size_t i; | |
288 | ||
289 | /* avoid allocating memory later by allocating all the keys now */ | |
290 | keys = g_malloc(sizeof(*keys) * n); | |
291 | for (i = 0; i < n; i++) { | |
292 | keys[i] = populate_offset + i; | |
293 | } | |
294 | ||
295 | /* some sanity checks */ | |
296 | g_assert_cmpuint(lookup_range, <=, n); | |
297 | ||
298 | /* compute thresholds */ | |
299 | do_threshold(update_rate, &update_threshold); | |
300 | do_threshold(resize_rate, &resize_threshold); | |
301 | ||
302 | if (resize_rate) { | |
303 | resize_min = n / 2; | |
304 | resize_max = n; | |
305 | assert(resize_min < resize_max); | |
306 | } else { | |
307 | n_rz_threads = 0; | |
308 | } | |
309 | ||
310 | /* initialize the hash table */ | |
311 | qht_init(&ht, qht_n_elems, qht_mode); | |
312 | assert(init_size <= init_range); | |
313 | ||
314 | pr_params(); | |
315 | ||
316 | fprintf(stderr, "Initialization: populating %zu items...", init_size); | |
317 | for (i = 0; i < init_size; i++) { | |
318 | for (;;) { | |
319 | uint32_t hash; | |
320 | long *p; | |
321 | ||
322 | r = xorshift64star(r); | |
323 | p = &keys[r & (init_range - 1)]; | |
324 | hash = h(*p); | |
325 | if (qht_insert(&ht, p, hash)) { | |
326 | break; | |
327 | } | |
328 | retries++; | |
329 | } | |
330 | } | |
331 | fprintf(stderr, " populated after %zu retries\n", retries); | |
332 | } | |
333 | ||
334 | static void add_stats(struct thread_stats *s, struct thread_info *info, int n) | |
335 | { | |
336 | int i; | |
337 | ||
338 | for (i = 0; i < n; i++) { | |
339 | struct thread_stats *stats = &info[i].stats; | |
340 | ||
341 | s->rd += stats->rd; | |
342 | s->not_rd += stats->not_rd; | |
343 | ||
344 | s->in += stats->in; | |
345 | s->not_in += stats->not_in; | |
346 | ||
347 | s->rm += stats->rm; | |
348 | s->not_rm += stats->not_rm; | |
349 | ||
350 | s->rz += stats->rz; | |
351 | s->not_rz += stats->not_rz; | |
352 | } | |
353 | } | |
354 | ||
355 | static void pr_stats(void) | |
356 | { | |
357 | struct thread_stats s = {}; | |
358 | double tx; | |
359 | ||
360 | add_stats(&s, rw_info, n_rw_threads); | |
361 | add_stats(&s, rz_info, n_rz_threads); | |
362 | ||
363 | printf("Results:\n"); | |
364 | ||
365 | if (resize_rate) { | |
366 | printf(" Resizes: %zu (%.2f%% of %zu)\n", | |
367 | s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); | |
368 | } | |
369 | ||
370 | printf(" Read: %.2f M (%.2f%% of %.2fM)\n", | |
371 | (double)s.rd / 1e6, | |
372 | (double)s.rd / (s.rd + s.not_rd) * 100, | |
373 | (double)(s.rd + s.not_rd) / 1e6); | |
374 | printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", | |
375 | (double)s.in / 1e6, | |
376 | (double)s.in / (s.in + s.not_in) * 100, | |
377 | (double)(s.in + s.not_in) / 1e6); | |
378 | printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", | |
379 | (double)s.rm / 1e6, | |
380 | (double)s.rm / (s.rm + s.not_rm) * 100, | |
381 | (double)(s.rm + s.not_rm) / 1e6); | |
382 | ||
383 | tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; | |
384 | printf(" Throughput: %.2f MT/s\n", tx); | |
385 | printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); | |
386 | } | |
387 | ||
388 | static void run_test(void) | |
389 | { | |
390 | unsigned int remaining; | |
391 | int i; | |
392 | ||
393 | while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { | |
394 | cpu_relax(); | |
395 | } | |
977ec47d | 396 | atomic_set(&test_start, true); |
515864a0 EC |
397 | do { |
398 | remaining = sleep(duration); | |
399 | } while (remaining); | |
977ec47d | 400 | atomic_set(&test_stop, true); |
515864a0 EC |
401 | |
402 | for (i = 0; i < n_rw_threads; i++) { | |
403 | qemu_thread_join(&rw_threads[i]); | |
404 | } | |
405 | for (i = 0; i < n_rz_threads; i++) { | |
406 | qemu_thread_join(&rz_threads[i]); | |
407 | } | |
408 | } | |
409 | ||
410 | static void parse_args(int argc, char *argv[]) | |
411 | { | |
412 | int c; | |
413 | ||
414 | for (;;) { | |
415 | c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:r:Rs:S:u:"); | |
416 | if (c < 0) { | |
417 | break; | |
418 | } | |
419 | switch (c) { | |
420 | case 'd': | |
421 | duration = atoi(optarg); | |
422 | break; | |
423 | case 'D': | |
424 | resize_delay = atol(optarg); | |
425 | break; | |
426 | case 'g': | |
427 | init_range = pow2ceil(atol(optarg)); | |
428 | lookup_range = pow2ceil(atol(optarg)); | |
429 | update_range = pow2ceil(atol(optarg)); | |
430 | qht_n_elems = atol(optarg); | |
431 | init_size = atol(optarg); | |
432 | break; | |
433 | case 'h': | |
434 | usage_complete(argc, argv); | |
435 | exit(0); | |
436 | case 'k': | |
437 | init_size = atol(optarg); | |
438 | break; | |
439 | case 'K': | |
440 | init_range = pow2ceil(atol(optarg)); | |
441 | break; | |
442 | case 'l': | |
443 | lookup_range = pow2ceil(atol(optarg)); | |
444 | break; | |
445 | case 'n': | |
446 | n_rw_threads = atoi(optarg); | |
447 | break; | |
448 | case 'N': | |
449 | n_rz_threads = atoi(optarg); | |
450 | break; | |
451 | case 'o': | |
452 | populate_offset = atol(optarg); | |
453 | break; | |
454 | case 'r': | |
455 | update_range = pow2ceil(atol(optarg)); | |
456 | break; | |
457 | case 'R': | |
458 | qht_mode |= QHT_MODE_AUTO_RESIZE; | |
459 | break; | |
460 | case 's': | |
461 | qht_n_elems = atol(optarg); | |
462 | break; | |
463 | case 'S': | |
464 | resize_rate = atof(optarg) / 100.0; | |
465 | if (resize_rate > 1.0) { | |
466 | resize_rate = 1.0; | |
467 | } | |
468 | break; | |
469 | case 'u': | |
470 | update_rate = atof(optarg) / 100.0; | |
471 | if (update_rate > 1.0) { | |
472 | update_rate = 1.0; | |
473 | } | |
474 | break; | |
475 | } | |
476 | } | |
477 | } | |
478 | ||
479 | int main(int argc, char *argv[]) | |
480 | { | |
481 | parse_args(argc, argv); | |
482 | htable_init(); | |
483 | create_threads(); | |
484 | run_test(); | |
485 | pr_stats(); | |
486 | return 0; | |
487 | } |