]>
Commit | Line | Data |
---|---|---|
25f539f3 EC |
1 | /* |
2 | * fp-bench.c - A collection of simple floating point microbenchmarks. | |
3 | * | |
4 | * Copyright (C) 2018, Emilio G. Cota <[email protected]> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
9 | #ifndef HW_POISON_H | |
10 | #error Must define HW_POISON_H to work around TARGET_* poisoning | |
11 | #endif | |
12 | ||
13 | #include "qemu/osdep.h" | |
14 | #include <math.h> | |
15 | #include <fenv.h> | |
16 | #include "qemu/timer.h" | |
f2b84b9e | 17 | #include "qemu/int128.h" |
25f539f3 EC |
18 | #include "fpu/softfloat.h" |
19 | ||
20 | /* amortize the computation of random inputs */ | |
21 | #define OPS_PER_ITER 50000 | |
22 | ||
23 | #define MAX_OPERANDS 3 | |
24 | ||
25 | #define SEED_A 0xdeadfacedeadface | |
26 | #define SEED_B 0xbadc0feebadc0fee | |
27 | #define SEED_C 0xbeefdeadbeefdead | |
28 | ||
29 | enum op { | |
30 | OP_ADD, | |
31 | OP_SUB, | |
32 | OP_MUL, | |
33 | OP_DIV, | |
34 | OP_FMA, | |
35 | OP_SQRT, | |
36 | OP_CMP, | |
37 | OP_MAX_NR, | |
38 | }; | |
39 | ||
40 | static const char * const op_names[] = { | |
41 | [OP_ADD] = "add", | |
42 | [OP_SUB] = "sub", | |
43 | [OP_MUL] = "mul", | |
44 | [OP_DIV] = "div", | |
45 | [OP_FMA] = "mulAdd", | |
46 | [OP_SQRT] = "sqrt", | |
47 | [OP_CMP] = "cmp", | |
48 | [OP_MAX_NR] = NULL, | |
49 | }; | |
50 | ||
51 | enum precision { | |
52 | PREC_SINGLE, | |
53 | PREC_DOUBLE, | |
f2b84b9e | 54 | PREC_QUAD, |
25f539f3 EC |
55 | PREC_FLOAT32, |
56 | PREC_FLOAT64, | |
f2b84b9e | 57 | PREC_FLOAT128, |
25f539f3 EC |
58 | PREC_MAX_NR, |
59 | }; | |
60 | ||
61 | enum rounding { | |
62 | ROUND_EVEN, | |
63 | ROUND_ZERO, | |
64 | ROUND_DOWN, | |
65 | ROUND_UP, | |
66 | ROUND_TIEAWAY, | |
67 | N_ROUND_MODES, | |
68 | }; | |
69 | ||
70 | static const char * const round_names[] = { | |
71 | [ROUND_EVEN] = "even", | |
72 | [ROUND_ZERO] = "zero", | |
73 | [ROUND_DOWN] = "down", | |
74 | [ROUND_UP] = "up", | |
75 | [ROUND_TIEAWAY] = "tieaway", | |
76 | }; | |
77 | ||
78 | enum tester { | |
79 | TESTER_SOFT, | |
80 | TESTER_HOST, | |
81 | TESTER_MAX_NR, | |
82 | }; | |
83 | ||
84 | static const char * const tester_names[] = { | |
85 | [TESTER_SOFT] = "soft", | |
86 | [TESTER_HOST] = "host", | |
87 | [TESTER_MAX_NR] = NULL, | |
88 | }; | |
89 | ||
90 | union fp { | |
91 | float f; | |
92 | double d; | |
93 | float32 f32; | |
94 | float64 f64; | |
f2b84b9e | 95 | float128 f128; |
25f539f3 EC |
96 | uint64_t u64; |
97 | }; | |
98 | ||
99 | struct op_state; | |
100 | ||
101 | typedef float (*float_func_t)(const struct op_state *s); | |
102 | typedef double (*double_func_t)(const struct op_state *s); | |
103 | ||
104 | union fp_func { | |
105 | float_func_t float_func; | |
106 | double_func_t double_func; | |
107 | }; | |
108 | ||
109 | typedef void (*bench_func_t)(void); | |
110 | ||
111 | struct op_desc { | |
112 | const char * const name; | |
113 | }; | |
114 | ||
115 | #define DEFAULT_DURATION_SECS 1 | |
116 | ||
117 | static uint64_t random_ops[MAX_OPERANDS] = { | |
118 | SEED_A, SEED_B, SEED_C, | |
119 | }; | |
f2b84b9e AB |
120 | |
121 | static float128 random_quad_ops[MAX_OPERANDS] = { | |
122 | {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A}, | |
123 | }; | |
25f539f3 EC |
124 | static float_status soft_status; |
125 | static enum precision precision; | |
126 | static enum op operation; | |
127 | static enum tester tester; | |
128 | static uint64_t n_completed_ops; | |
129 | static unsigned int duration = DEFAULT_DURATION_SECS; | |
130 | static int64_t ns_elapsed; | |
131 | /* disable optimizations with volatile */ | |
132 | static volatile union fp res; | |
133 | ||
134 | /* | |
135 | * From: https://en.wikipedia.org/wiki/Xorshift | |
136 | * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only | |
137 | * guaranteed to be >= INT_MAX). | |
138 | */ | |
139 | static uint64_t xorshift64star(uint64_t x) | |
140 | { | |
141 | x ^= x >> 12; /* a */ | |
142 | x ^= x << 25; /* b */ | |
143 | x ^= x >> 27; /* c */ | |
144 | return x * UINT64_C(2685821657736338717); | |
145 | } | |
146 | ||
147 | static void update_random_ops(int n_ops, enum precision prec) | |
148 | { | |
149 | int i; | |
150 | ||
151 | for (i = 0; i < n_ops; i++) { | |
25f539f3 | 152 | |
446cfb0d EC |
153 | switch (prec) { |
154 | case PREC_SINGLE: | |
155 | case PREC_FLOAT32: | |
f2b84b9e AB |
156 | { |
157 | uint64_t r = random_ops[i]; | |
25f539f3 EC |
158 | do { |
159 | r = xorshift64star(r); | |
160 | } while (!float32_is_normal(r)); | |
f2b84b9e | 161 | random_ops[i] = r; |
446cfb0d | 162 | break; |
f2b84b9e | 163 | } |
446cfb0d EC |
164 | case PREC_DOUBLE: |
165 | case PREC_FLOAT64: | |
f2b84b9e AB |
166 | { |
167 | uint64_t r = random_ops[i]; | |
25f539f3 EC |
168 | do { |
169 | r = xorshift64star(r); | |
170 | } while (!float64_is_normal(r)); | |
f2b84b9e AB |
171 | random_ops[i] = r; |
172 | break; | |
173 | } | |
174 | case PREC_QUAD: | |
175 | case PREC_FLOAT128: | |
176 | { | |
177 | float128 r = random_quad_ops[i]; | |
178 | uint64_t hi = r.high; | |
179 | uint64_t lo = r.low; | |
180 | do { | |
181 | hi = xorshift64star(hi); | |
182 | lo = xorshift64star(lo); | |
183 | r = make_float128(hi, lo); | |
184 | } while (!float128_is_normal(r)); | |
185 | random_quad_ops[i] = r; | |
446cfb0d | 186 | break; |
f2b84b9e | 187 | } |
446cfb0d | 188 | default: |
25f539f3 EC |
189 | g_assert_not_reached(); |
190 | } | |
25f539f3 EC |
191 | } |
192 | } | |
193 | ||
194 | static void fill_random(union fp *ops, int n_ops, enum precision prec, | |
195 | bool no_neg) | |
196 | { | |
197 | int i; | |
198 | ||
199 | for (i = 0; i < n_ops; i++) { | |
200 | switch (prec) { | |
201 | case PREC_SINGLE: | |
202 | case PREC_FLOAT32: | |
203 | ops[i].f32 = make_float32(random_ops[i]); | |
204 | if (no_neg && float32_is_neg(ops[i].f32)) { | |
205 | ops[i].f32 = float32_chs(ops[i].f32); | |
206 | } | |
25f539f3 EC |
207 | break; |
208 | case PREC_DOUBLE: | |
209 | case PREC_FLOAT64: | |
210 | ops[i].f64 = make_float64(random_ops[i]); | |
211 | if (no_neg && float64_is_neg(ops[i].f64)) { | |
212 | ops[i].f64 = float64_chs(ops[i].f64); | |
213 | } | |
25f539f3 | 214 | break; |
f2b84b9e AB |
215 | case PREC_QUAD: |
216 | case PREC_FLOAT128: | |
217 | ops[i].f128 = random_quad_ops[i]; | |
218 | if (no_neg && float128_is_neg(ops[i].f128)) { | |
219 | ops[i].f128 = float128_chs(ops[i].f128); | |
220 | } | |
221 | break; | |
25f539f3 EC |
222 | default: |
223 | g_assert_not_reached(); | |
224 | } | |
225 | } | |
226 | } | |
227 | ||
228 | /* | |
229 | * The main benchmark function. Instead of (ab)using macros, we rely | |
230 | * on the compiler to unfold this at compile-time. | |
231 | */ | |
232 | static void bench(enum precision prec, enum op op, int n_ops, bool no_neg) | |
233 | { | |
234 | int64_t tf = get_clock() + duration * 1000000000LL; | |
235 | ||
236 | while (get_clock() < tf) { | |
237 | union fp ops[MAX_OPERANDS]; | |
238 | int64_t t0; | |
239 | int i; | |
240 | ||
241 | update_random_ops(n_ops, prec); | |
242 | switch (prec) { | |
243 | case PREC_SINGLE: | |
244 | fill_random(ops, n_ops, prec, no_neg); | |
245 | t0 = get_clock(); | |
246 | for (i = 0; i < OPS_PER_ITER; i++) { | |
247 | float a = ops[0].f; | |
248 | float b = ops[1].f; | |
249 | float c = ops[2].f; | |
250 | ||
251 | switch (op) { | |
252 | case OP_ADD: | |
253 | res.f = a + b; | |
254 | break; | |
255 | case OP_SUB: | |
256 | res.f = a - b; | |
257 | break; | |
258 | case OP_MUL: | |
259 | res.f = a * b; | |
260 | break; | |
261 | case OP_DIV: | |
262 | res.f = a / b; | |
263 | break; | |
264 | case OP_FMA: | |
265 | res.f = fmaf(a, b, c); | |
266 | break; | |
267 | case OP_SQRT: | |
268 | res.f = sqrtf(a); | |
269 | break; | |
270 | case OP_CMP: | |
271 | res.u64 = isgreater(a, b); | |
272 | break; | |
273 | default: | |
274 | g_assert_not_reached(); | |
275 | } | |
276 | } | |
277 | break; | |
278 | case PREC_DOUBLE: | |
279 | fill_random(ops, n_ops, prec, no_neg); | |
280 | t0 = get_clock(); | |
281 | for (i = 0; i < OPS_PER_ITER; i++) { | |
282 | double a = ops[0].d; | |
283 | double b = ops[1].d; | |
284 | double c = ops[2].d; | |
285 | ||
286 | switch (op) { | |
287 | case OP_ADD: | |
288 | res.d = a + b; | |
289 | break; | |
290 | case OP_SUB: | |
291 | res.d = a - b; | |
292 | break; | |
293 | case OP_MUL: | |
294 | res.d = a * b; | |
295 | break; | |
296 | case OP_DIV: | |
297 | res.d = a / b; | |
298 | break; | |
299 | case OP_FMA: | |
300 | res.d = fma(a, b, c); | |
301 | break; | |
302 | case OP_SQRT: | |
303 | res.d = sqrt(a); | |
304 | break; | |
305 | case OP_CMP: | |
306 | res.u64 = isgreater(a, b); | |
307 | break; | |
308 | default: | |
309 | g_assert_not_reached(); | |
310 | } | |
311 | } | |
312 | break; | |
313 | case PREC_FLOAT32: | |
314 | fill_random(ops, n_ops, prec, no_neg); | |
315 | t0 = get_clock(); | |
316 | for (i = 0; i < OPS_PER_ITER; i++) { | |
317 | float32 a = ops[0].f32; | |
318 | float32 b = ops[1].f32; | |
319 | float32 c = ops[2].f32; | |
320 | ||
321 | switch (op) { | |
322 | case OP_ADD: | |
323 | res.f32 = float32_add(a, b, &soft_status); | |
324 | break; | |
325 | case OP_SUB: | |
326 | res.f32 = float32_sub(a, b, &soft_status); | |
327 | break; | |
328 | case OP_MUL: | |
329 | res.f = float32_mul(a, b, &soft_status); | |
330 | break; | |
331 | case OP_DIV: | |
332 | res.f32 = float32_div(a, b, &soft_status); | |
333 | break; | |
334 | case OP_FMA: | |
335 | res.f32 = float32_muladd(a, b, c, 0, &soft_status); | |
336 | break; | |
337 | case OP_SQRT: | |
338 | res.f32 = float32_sqrt(a, &soft_status); | |
339 | break; | |
340 | case OP_CMP: | |
341 | res.u64 = float32_compare_quiet(a, b, &soft_status); | |
342 | break; | |
343 | default: | |
344 | g_assert_not_reached(); | |
345 | } | |
346 | } | |
347 | break; | |
348 | case PREC_FLOAT64: | |
349 | fill_random(ops, n_ops, prec, no_neg); | |
350 | t0 = get_clock(); | |
351 | for (i = 0; i < OPS_PER_ITER; i++) { | |
352 | float64 a = ops[0].f64; | |
353 | float64 b = ops[1].f64; | |
354 | float64 c = ops[2].f64; | |
355 | ||
356 | switch (op) { | |
357 | case OP_ADD: | |
358 | res.f64 = float64_add(a, b, &soft_status); | |
359 | break; | |
360 | case OP_SUB: | |
361 | res.f64 = float64_sub(a, b, &soft_status); | |
362 | break; | |
363 | case OP_MUL: | |
364 | res.f = float64_mul(a, b, &soft_status); | |
365 | break; | |
366 | case OP_DIV: | |
367 | res.f64 = float64_div(a, b, &soft_status); | |
368 | break; | |
369 | case OP_FMA: | |
370 | res.f64 = float64_muladd(a, b, c, 0, &soft_status); | |
371 | break; | |
372 | case OP_SQRT: | |
373 | res.f64 = float64_sqrt(a, &soft_status); | |
374 | break; | |
375 | case OP_CMP: | |
376 | res.u64 = float64_compare_quiet(a, b, &soft_status); | |
377 | break; | |
378 | default: | |
379 | g_assert_not_reached(); | |
380 | } | |
381 | } | |
382 | break; | |
f2b84b9e AB |
383 | case PREC_FLOAT128: |
384 | fill_random(ops, n_ops, prec, no_neg); | |
385 | t0 = get_clock(); | |
386 | for (i = 0; i < OPS_PER_ITER; i++) { | |
387 | float128 a = ops[0].f128; | |
388 | float128 b = ops[1].f128; | |
dedd123c | 389 | float128 c = ops[2].f128; |
f2b84b9e AB |
390 | |
391 | switch (op) { | |
392 | case OP_ADD: | |
393 | res.f128 = float128_add(a, b, &soft_status); | |
394 | break; | |
395 | case OP_SUB: | |
396 | res.f128 = float128_sub(a, b, &soft_status); | |
397 | break; | |
398 | case OP_MUL: | |
399 | res.f128 = float128_mul(a, b, &soft_status); | |
400 | break; | |
401 | case OP_DIV: | |
402 | res.f128 = float128_div(a, b, &soft_status); | |
403 | break; | |
dedd123c RH |
404 | case OP_FMA: |
405 | res.f128 = float128_muladd(a, b, c, 0, &soft_status); | |
406 | break; | |
f2b84b9e AB |
407 | case OP_SQRT: |
408 | res.f128 = float128_sqrt(a, &soft_status); | |
409 | break; | |
410 | case OP_CMP: | |
411 | res.u64 = float128_compare_quiet(a, b, &soft_status); | |
412 | break; | |
413 | default: | |
414 | g_assert_not_reached(); | |
415 | } | |
416 | } | |
417 | break; | |
25f539f3 EC |
418 | default: |
419 | g_assert_not_reached(); | |
420 | } | |
421 | ns_elapsed += get_clock() - t0; | |
422 | n_completed_ops += OPS_PER_ITER; | |
423 | } | |
424 | } | |
425 | ||
426 | #define GEN_BENCH(name, type, prec, op, n_ops) \ | |
427 | static void __attribute__((flatten)) name(void) \ | |
428 | { \ | |
429 | bench(prec, op, n_ops, false); \ | |
430 | } | |
431 | ||
432 | #define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \ | |
433 | static void __attribute__((flatten)) name(void) \ | |
434 | { \ | |
435 | bench(prec, op, n_ops, true); \ | |
436 | } | |
437 | ||
438 | #define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \ | |
439 | GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \ | |
440 | GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \ | |
441 | GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \ | |
f2b84b9e AB |
442 | GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \ |
443 | GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops) | |
25f539f3 EC |
444 | |
445 | GEN_BENCH_ALL_TYPES(add, OP_ADD, 2) | |
446 | GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2) | |
447 | GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2) | |
448 | GEN_BENCH_ALL_TYPES(div, OP_DIV, 2) | |
449 | GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3) | |
450 | GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2) | |
451 | #undef GEN_BENCH_ALL_TYPES | |
452 | ||
453 | #define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \ | |
454 | GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \ | |
455 | GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \ | |
456 | GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \ | |
f2b84b9e AB |
457 | GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \ |
458 | GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n) | |
25f539f3 EC |
459 | |
460 | GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1) | |
461 | #undef GEN_BENCH_ALL_TYPES_NO_NEG | |
462 | ||
463 | #undef GEN_BENCH_NO_NEG | |
464 | #undef GEN_BENCH | |
465 | ||
466 | #define GEN_BENCH_FUNCS(opname, op) \ | |
467 | [op] = { \ | |
468 | [PREC_SINGLE] = bench_ ## opname ## _float, \ | |
469 | [PREC_DOUBLE] = bench_ ## opname ## _double, \ | |
470 | [PREC_FLOAT32] = bench_ ## opname ## _float32, \ | |
471 | [PREC_FLOAT64] = bench_ ## opname ## _float64, \ | |
f2b84b9e | 472 | [PREC_FLOAT128] = bench_ ## opname ## _float128, \ |
25f539f3 EC |
473 | } |
474 | ||
475 | static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = { | |
476 | GEN_BENCH_FUNCS(add, OP_ADD), | |
477 | GEN_BENCH_FUNCS(sub, OP_SUB), | |
478 | GEN_BENCH_FUNCS(mul, OP_MUL), | |
479 | GEN_BENCH_FUNCS(div, OP_DIV), | |
480 | GEN_BENCH_FUNCS(fma, OP_FMA), | |
481 | GEN_BENCH_FUNCS(sqrt, OP_SQRT), | |
482 | GEN_BENCH_FUNCS(cmp, OP_CMP), | |
483 | }; | |
484 | ||
485 | #undef GEN_BENCH_FUNCS | |
486 | ||
487 | static void run_bench(void) | |
488 | { | |
489 | bench_func_t f; | |
490 | ||
491 | f = bench_funcs[operation][precision]; | |
492 | g_assert(f); | |
493 | f(); | |
494 | } | |
495 | ||
496 | /* @arr must be NULL-terminated */ | |
497 | static int find_name(const char * const *arr, const char *name) | |
498 | { | |
499 | int i; | |
500 | ||
501 | for (i = 0; arr[i] != NULL; i++) { | |
502 | if (strcmp(name, arr[i]) == 0) { | |
503 | return i; | |
504 | } | |
505 | } | |
506 | return -1; | |
507 | } | |
508 | ||
509 | static void usage_complete(int argc, char *argv[]) | |
510 | { | |
511 | gchar *op_list = g_strjoinv(", ", (gchar **)op_names); | |
512 | gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names); | |
513 | ||
514 | fprintf(stderr, "Usage: %s [options]\n", argv[0]); | |
515 | fprintf(stderr, "options:\n"); | |
516 | fprintf(stderr, " -d = duration, in seconds. Default: %d\n", | |
517 | DEFAULT_DURATION_SECS); | |
518 | fprintf(stderr, " -h = show this help message.\n"); | |
519 | fprintf(stderr, " -o = floating point operation (%s). Default: %s\n", | |
520 | op_list, op_names[0]); | |
f2b84b9e | 521 | fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). " |
25f539f3 EC |
522 | "Default: single\n"); |
523 | fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). " | |
524 | "Default: even\n"); | |
525 | fprintf(stderr, " -t = tester (%s). Default: %s\n", | |
526 | tester_list, tester_names[0]); | |
527 | fprintf(stderr, " -z = flush inputs to zero (soft tester only). " | |
528 | "Default: disabled\n"); | |
529 | fprintf(stderr, " -Z = flush output to zero (soft tester only). " | |
530 | "Default: disabled\n"); | |
531 | ||
532 | g_free(tester_list); | |
533 | g_free(op_list); | |
534 | } | |
535 | ||
536 | static int round_name_to_mode(const char *name) | |
537 | { | |
538 | int i; | |
539 | ||
540 | for (i = 0; i < N_ROUND_MODES; i++) { | |
541 | if (!strcmp(round_names[i], name)) { | |
542 | return i; | |
543 | } | |
544 | } | |
545 | return -1; | |
546 | } | |
547 | ||
8905770b MAL |
548 | static G_NORETURN |
549 | void die_host_rounding(enum rounding rounding) | |
25f539f3 EC |
550 | { |
551 | fprintf(stderr, "fatal: '%s' rounding not supported on this host\n", | |
552 | round_names[rounding]); | |
553 | exit(EXIT_FAILURE); | |
554 | } | |
555 | ||
556 | static void set_host_precision(enum rounding rounding) | |
557 | { | |
558 | int rhost; | |
559 | ||
560 | switch (rounding) { | |
561 | case ROUND_EVEN: | |
562 | rhost = FE_TONEAREST; | |
563 | break; | |
564 | case ROUND_ZERO: | |
565 | rhost = FE_TOWARDZERO; | |
566 | break; | |
567 | case ROUND_DOWN: | |
568 | rhost = FE_DOWNWARD; | |
569 | break; | |
570 | case ROUND_UP: | |
571 | rhost = FE_UPWARD; | |
572 | break; | |
573 | case ROUND_TIEAWAY: | |
574 | die_host_rounding(rounding); | |
575 | return; | |
576 | default: | |
577 | g_assert_not_reached(); | |
578 | } | |
579 | ||
580 | if (fesetround(rhost)) { | |
581 | die_host_rounding(rounding); | |
582 | } | |
583 | } | |
584 | ||
585 | static void set_soft_precision(enum rounding rounding) | |
586 | { | |
587 | signed char mode; | |
588 | ||
589 | switch (rounding) { | |
590 | case ROUND_EVEN: | |
591 | mode = float_round_nearest_even; | |
592 | break; | |
593 | case ROUND_ZERO: | |
594 | mode = float_round_to_zero; | |
595 | break; | |
596 | case ROUND_DOWN: | |
597 | mode = float_round_down; | |
598 | break; | |
599 | case ROUND_UP: | |
600 | mode = float_round_up; | |
601 | break; | |
602 | case ROUND_TIEAWAY: | |
603 | mode = float_round_ties_away; | |
604 | break; | |
605 | default: | |
606 | g_assert_not_reached(); | |
607 | } | |
608 | soft_status.float_rounding_mode = mode; | |
609 | } | |
610 | ||
611 | static void parse_args(int argc, char *argv[]) | |
612 | { | |
613 | int c; | |
614 | int val; | |
615 | int rounding = ROUND_EVEN; | |
616 | ||
617 | for (;;) { | |
618 | c = getopt(argc, argv, "d:ho:p:r:t:zZ"); | |
619 | if (c < 0) { | |
620 | break; | |
621 | } | |
622 | switch (c) { | |
623 | case 'd': | |
624 | duration = atoi(optarg); | |
625 | break; | |
626 | case 'h': | |
627 | usage_complete(argc, argv); | |
628 | exit(EXIT_SUCCESS); | |
629 | case 'o': | |
630 | val = find_name(op_names, optarg); | |
631 | if (val < 0) { | |
632 | fprintf(stderr, "Unsupported op '%s'\n", optarg); | |
633 | exit(EXIT_FAILURE); | |
634 | } | |
635 | operation = val; | |
636 | break; | |
637 | case 'p': | |
638 | if (!strcmp(optarg, "single")) { | |
639 | precision = PREC_SINGLE; | |
640 | } else if (!strcmp(optarg, "double")) { | |
641 | precision = PREC_DOUBLE; | |
f2b84b9e AB |
642 | } else if (!strcmp(optarg, "quad")) { |
643 | precision = PREC_QUAD; | |
25f539f3 EC |
644 | } else { |
645 | fprintf(stderr, "Unsupported precision '%s'\n", optarg); | |
646 | exit(EXIT_FAILURE); | |
647 | } | |
648 | break; | |
649 | case 'r': | |
650 | rounding = round_name_to_mode(optarg); | |
651 | if (rounding < 0) { | |
652 | fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg); | |
653 | exit(EXIT_FAILURE); | |
654 | } | |
655 | break; | |
656 | case 't': | |
657 | val = find_name(tester_names, optarg); | |
658 | if (val < 0) { | |
659 | fprintf(stderr, "Unsupported tester '%s'\n", optarg); | |
660 | exit(EXIT_FAILURE); | |
661 | } | |
662 | tester = val; | |
663 | break; | |
664 | case 'z': | |
665 | soft_status.flush_inputs_to_zero = 1; | |
666 | break; | |
667 | case 'Z': | |
668 | soft_status.flush_to_zero = 1; | |
669 | break; | |
670 | } | |
671 | } | |
672 | ||
673 | /* set precision and rounding mode based on the tester */ | |
674 | switch (tester) { | |
675 | case TESTER_HOST: | |
676 | set_host_precision(rounding); | |
677 | break; | |
678 | case TESTER_SOFT: | |
679 | set_soft_precision(rounding); | |
680 | switch (precision) { | |
681 | case PREC_SINGLE: | |
682 | precision = PREC_FLOAT32; | |
683 | break; | |
684 | case PREC_DOUBLE: | |
685 | precision = PREC_FLOAT64; | |
686 | break; | |
f2b84b9e AB |
687 | case PREC_QUAD: |
688 | precision = PREC_FLOAT128; | |
689 | break; | |
25f539f3 EC |
690 | default: |
691 | g_assert_not_reached(); | |
692 | } | |
693 | break; | |
694 | default: | |
695 | g_assert_not_reached(); | |
696 | } | |
697 | } | |
698 | ||
699 | static void pr_stats(void) | |
700 | { | |
701 | printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3); | |
702 | } | |
703 | ||
704 | int main(int argc, char *argv[]) | |
705 | { | |
706 | parse_args(argc, argv); | |
707 | run_bench(); | |
708 | pr_stats(); | |
709 | return 0; | |
710 | } |