2 * fp-bench.c - A collection of simple floating point microbenchmarks.
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #error Must define HW_POISON_H to work around TARGET_* poisoning
13 #include "qemu/osdep.h"
16 #include "qemu/timer.h"
17 #include "fpu/softfloat.h"
19 /* amortize the computation of random inputs */
20 #define OPS_PER_ITER 50000
22 #define MAX_OPERANDS 3
24 #define SEED_A 0xdeadfacedeadface
25 #define SEED_B 0xbadc0feebadc0fee
26 #define SEED_C 0xbeefdeadbeefdead
39 static const char * const op_names[] = {
67 static const char * const round_names[] = {
68 [ROUND_EVEN] = "even",
69 [ROUND_ZERO] = "zero",
70 [ROUND_DOWN] = "down",
72 [ROUND_TIEAWAY] = "tieaway",
81 static const char * const tester_names[] = {
82 [TESTER_SOFT] = "soft",
83 [TESTER_HOST] = "host",
84 [TESTER_MAX_NR] = NULL,
97 typedef float (*float_func_t)(const struct op_state *s);
98 typedef double (*double_func_t)(const struct op_state *s);
101 float_func_t float_func;
102 double_func_t double_func;
105 typedef void (*bench_func_t)(void);
108 const char * const name;
111 #define DEFAULT_DURATION_SECS 1
113 static uint64_t random_ops[MAX_OPERANDS] = {
114 SEED_A, SEED_B, SEED_C,
116 static float_status soft_status;
117 static enum precision precision;
118 static enum op operation;
119 static enum tester tester;
120 static uint64_t n_completed_ops;
121 static unsigned int duration = DEFAULT_DURATION_SECS;
122 static int64_t ns_elapsed;
123 /* disable optimizations with volatile */
124 static volatile union fp res;
127 * From: https://en.wikipedia.org/wiki/Xorshift
128 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
129 * guaranteed to be >= INT_MAX).
131 static uint64_t xorshift64star(uint64_t x)
133 x ^= x >> 12; /* a */
134 x ^= x << 25; /* b */
135 x ^= x >> 27; /* c */
136 return x * UINT64_C(2685821657736338717);
139 static void update_random_ops(int n_ops, enum precision prec)
143 for (i = 0; i < n_ops; i++) {
144 uint64_t r = random_ops[i];
150 r = xorshift64star(r);
151 } while (!float32_is_normal(r));
156 r = xorshift64star(r);
157 } while (!float64_is_normal(r));
160 g_assert_not_reached();
166 static void fill_random(union fp *ops, int n_ops, enum precision prec,
171 for (i = 0; i < n_ops; i++) {
175 ops[i].f32 = make_float32(random_ops[i]);
176 if (no_neg && float32_is_neg(ops[i].f32)) {
177 ops[i].f32 = float32_chs(ops[i].f32);
182 ops[i].f64 = make_float64(random_ops[i]);
183 if (no_neg && float64_is_neg(ops[i].f64)) {
184 ops[i].f64 = float64_chs(ops[i].f64);
188 g_assert_not_reached();
194 * The main benchmark function. Instead of (ab)using macros, we rely
195 * on the compiler to unfold this at compile-time.
197 static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
199 int64_t tf = get_clock() + duration * 1000000000LL;
201 while (get_clock() < tf) {
202 union fp ops[MAX_OPERANDS];
206 update_random_ops(n_ops, prec);
209 fill_random(ops, n_ops, prec, no_neg);
211 for (i = 0; i < OPS_PER_ITER; i++) {
230 res.f = fmaf(a, b, c);
236 res.u64 = isgreater(a, b);
239 g_assert_not_reached();
244 fill_random(ops, n_ops, prec, no_neg);
246 for (i = 0; i < OPS_PER_ITER; i++) {
265 res.d = fma(a, b, c);
271 res.u64 = isgreater(a, b);
274 g_assert_not_reached();
279 fill_random(ops, n_ops, prec, no_neg);
281 for (i = 0; i < OPS_PER_ITER; i++) {
282 float32 a = ops[0].f32;
283 float32 b = ops[1].f32;
284 float32 c = ops[2].f32;
288 res.f32 = float32_add(a, b, &soft_status);
291 res.f32 = float32_sub(a, b, &soft_status);
294 res.f = float32_mul(a, b, &soft_status);
297 res.f32 = float32_div(a, b, &soft_status);
300 res.f32 = float32_muladd(a, b, c, 0, &soft_status);
303 res.f32 = float32_sqrt(a, &soft_status);
306 res.u64 = float32_compare_quiet(a, b, &soft_status);
309 g_assert_not_reached();
314 fill_random(ops, n_ops, prec, no_neg);
316 for (i = 0; i < OPS_PER_ITER; i++) {
317 float64 a = ops[0].f64;
318 float64 b = ops[1].f64;
319 float64 c = ops[2].f64;
323 res.f64 = float64_add(a, b, &soft_status);
326 res.f64 = float64_sub(a, b, &soft_status);
329 res.f = float64_mul(a, b, &soft_status);
332 res.f64 = float64_div(a, b, &soft_status);
335 res.f64 = float64_muladd(a, b, c, 0, &soft_status);
338 res.f64 = float64_sqrt(a, &soft_status);
341 res.u64 = float64_compare_quiet(a, b, &soft_status);
344 g_assert_not_reached();
349 g_assert_not_reached();
351 ns_elapsed += get_clock() - t0;
352 n_completed_ops += OPS_PER_ITER;
356 #define GEN_BENCH(name, type, prec, op, n_ops) \
357 static void __attribute__((flatten)) name(void) \
359 bench(prec, op, n_ops, false); \
362 #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
363 static void __attribute__((flatten)) name(void) \
365 bench(prec, op, n_ops, true); \
368 #define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
369 GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
370 GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
371 GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
372 GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
374 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
375 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
376 GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
377 GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
378 GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
379 GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
380 #undef GEN_BENCH_ALL_TYPES
382 #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
383 GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
384 GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
385 GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
386 GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
388 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
389 #undef GEN_BENCH_ALL_TYPES_NO_NEG
391 #undef GEN_BENCH_NO_NEG
394 #define GEN_BENCH_FUNCS(opname, op) \
396 [PREC_SINGLE] = bench_ ## opname ## _float, \
397 [PREC_DOUBLE] = bench_ ## opname ## _double, \
398 [PREC_FLOAT32] = bench_ ## opname ## _float32, \
399 [PREC_FLOAT64] = bench_ ## opname ## _float64, \
402 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
403 GEN_BENCH_FUNCS(add, OP_ADD),
404 GEN_BENCH_FUNCS(sub, OP_SUB),
405 GEN_BENCH_FUNCS(mul, OP_MUL),
406 GEN_BENCH_FUNCS(div, OP_DIV),
407 GEN_BENCH_FUNCS(fma, OP_FMA),
408 GEN_BENCH_FUNCS(sqrt, OP_SQRT),
409 GEN_BENCH_FUNCS(cmp, OP_CMP),
412 #undef GEN_BENCH_FUNCS
414 static void run_bench(void)
418 f = bench_funcs[operation][precision];
423 /* @arr must be NULL-terminated */
424 static int find_name(const char * const *arr, const char *name)
428 for (i = 0; arr[i] != NULL; i++) {
429 if (strcmp(name, arr[i]) == 0) {
436 static void usage_complete(int argc, char *argv[])
438 gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
439 gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
441 fprintf(stderr, "Usage: %s [options]\n", argv[0]);
442 fprintf(stderr, "options:\n");
443 fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
444 DEFAULT_DURATION_SECS);
445 fprintf(stderr, " -h = show this help message.\n");
446 fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
447 op_list, op_names[0]);
448 fprintf(stderr, " -p = floating point precision (single, double). "
449 "Default: single\n");
450 fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
452 fprintf(stderr, " -t = tester (%s). Default: %s\n",
453 tester_list, tester_names[0]);
454 fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
455 "Default: disabled\n");
456 fprintf(stderr, " -Z = flush output to zero (soft tester only). "
457 "Default: disabled\n");
463 static int round_name_to_mode(const char *name)
467 for (i = 0; i < N_ROUND_MODES; i++) {
468 if (!strcmp(round_names[i], name)) {
475 static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
477 fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
478 round_names[rounding]);
482 static void set_host_precision(enum rounding rounding)
488 rhost = FE_TONEAREST;
491 rhost = FE_TOWARDZERO;
500 die_host_rounding(rounding);
503 g_assert_not_reached();
506 if (fesetround(rhost)) {
507 die_host_rounding(rounding);
511 static void set_soft_precision(enum rounding rounding)
517 mode = float_round_nearest_even;
520 mode = float_round_to_zero;
523 mode = float_round_down;
526 mode = float_round_up;
529 mode = float_round_ties_away;
532 g_assert_not_reached();
534 soft_status.float_rounding_mode = mode;
537 static void parse_args(int argc, char *argv[])
541 int rounding = ROUND_EVEN;
544 c = getopt(argc, argv, "d:ho:p:r:t:zZ");
550 duration = atoi(optarg);
553 usage_complete(argc, argv);
556 val = find_name(op_names, optarg);
558 fprintf(stderr, "Unsupported op '%s'\n", optarg);
564 if (!strcmp(optarg, "single")) {
565 precision = PREC_SINGLE;
566 } else if (!strcmp(optarg, "double")) {
567 precision = PREC_DOUBLE;
569 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
574 rounding = round_name_to_mode(optarg);
576 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
581 val = find_name(tester_names, optarg);
583 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
589 soft_status.flush_inputs_to_zero = 1;
592 soft_status.flush_to_zero = 1;
597 /* set precision and rounding mode based on the tester */
600 set_host_precision(rounding);
603 set_soft_precision(rounding);
606 precision = PREC_FLOAT32;
609 precision = PREC_FLOAT64;
612 g_assert_not_reached();
616 g_assert_not_reached();
620 static void pr_stats(void)
622 printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
625 int main(int argc, char *argv[])
627 parse_args(argc, argv);