Git Repo - qemu.git/blame - tests/fp/fp-bench.c

Commit	Line	Data
25f539f3 EC	1	/*
	2	* fp-bench.c - A collection of simple floating point microbenchmarks.
	3	*
	4	* Copyright (C) 2018, Emilio G. Cota <[email protected]>
	5	*
	6	* License: GNU GPL, version 2 or later.
	7	* See the COPYING file in the top-level directory.
	8	*/
	9	#ifndef HW_POISON_H
	10	#error Must define HW_POISON_H to work around TARGET_* poisoning
	11	#endif
	12
	13	#include "qemu/osdep.h"
	14	#include <math.h>
	15	#include <fenv.h>
	16	#include "qemu/timer.h"
f2b84b9e	17	#include "qemu/int128.h"
25f539f3 EC	18	#include "fpu/softfloat.h"
	19
	20	/* amortize the computation of random inputs */
	21	#define OPS_PER_ITER 50000
	22
	23	#define MAX_OPERANDS 3
	24
	25	#define SEED_A 0xdeadfacedeadface
	26	#define SEED_B 0xbadc0feebadc0fee
	27	#define SEED_C 0xbeefdeadbeefdead
	28
	29	enum op {
	30	OP_ADD,
	31	OP_SUB,
	32	OP_MUL,
	33	OP_DIV,
	34	OP_FMA,
	35	OP_SQRT,
	36	OP_CMP,
	37	OP_MAX_NR,
	38	};
	39
	40	static const char * const op_names[] = {
	41	[OP_ADD] = "add",
	42	[OP_SUB] = "sub",
	43	[OP_MUL] = "mul",
	44	[OP_DIV] = "div",
	45	[OP_FMA] = "mulAdd",
	46	[OP_SQRT] = "sqrt",
	47	[OP_CMP] = "cmp",
	48	[OP_MAX_NR] = NULL,
	49	};
	50
	51	enum precision {
	52	PREC_SINGLE,
	53	PREC_DOUBLE,
f2b84b9e	54	PREC_QUAD,
25f539f3 EC	55	PREC_FLOAT32,
25f539f3 EC	56	PREC_FLOAT64,
f2b84b9e	57	PREC_FLOAT128,
25f539f3 EC	58	PREC_MAX_NR,
	59	};
	60
	61	enum rounding {
	62	ROUND_EVEN,
	63	ROUND_ZERO,
	64	ROUND_DOWN,
	65	ROUND_UP,
	66	ROUND_TIEAWAY,
	67	N_ROUND_MODES,
	68	};
	69
	70	static const char * const round_names[] = {
	71	[ROUND_EVEN] = "even",
	72	[ROUND_ZERO] = "zero",
	73	[ROUND_DOWN] = "down",
	74	[ROUND_UP] = "up",
	75	[ROUND_TIEAWAY] = "tieaway",
	76	};
	77
	78	enum tester {
	79	TESTER_SOFT,
	80	TESTER_HOST,
	81	TESTER_MAX_NR,
	82	};
	83
	84	static const char * const tester_names[] = {
	85	[TESTER_SOFT] = "soft",
	86	[TESTER_HOST] = "host",
	87	[TESTER_MAX_NR] = NULL,
	88	};
	89
	90	union fp {
	91	float f;
	92	double d;
	93	float32 f32;
	94	float64 f64;
f2b84b9e	95	float128 f128;
25f539f3 EC	96	uint64_t u64;
	97	};
	98
	99	struct op_state;
	100
	101	typedef float (float_func_t)(const struct op_state s);
	102	typedef double (double_func_t)(const struct op_state s);
	103
	104	union fp_func {
	105	float_func_t float_func;
	106	double_func_t double_func;
	107	};
	108
	109	typedef void (*bench_func_t)(void);
	110
	111	struct op_desc {
	112	const char * const name;
	113	};
	114
	115	#define DEFAULT_DURATION_SECS 1
	116
	117	static uint64_t random_ops[MAX_OPERANDS] = {
	118	SEED_A, SEED_B, SEED_C,
	119	};
f2b84b9e AB	120
	121	static float128 random_quad_ops[MAX_OPERANDS] = {
	122	{SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
	123	};
25f539f3 EC	124	static float_status soft_status;
	125	static enum precision precision;
	126	static enum op operation;
	127	static enum tester tester;
	128	static uint64_t n_completed_ops;
	129	static unsigned int duration = DEFAULT_DURATION_SECS;
	130	static int64_t ns_elapsed;
	131	/* disable optimizations with volatile */
	132	static volatile union fp res;
	133
	134	/*
	135	* From: https://en.wikipedia.org/wiki/Xorshift
	136	* This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
	137	* guaranteed to be >= INT_MAX).
	138	*/
	139	static uint64_t xorshift64star(uint64_t x)
	140	{
	141	x ^= x >> 12; /* a */
	142	x ^= x << 25; /* b */
	143	x ^= x >> 27; /* c */
	144	return x * UINT64_C(2685821657736338717);
	145	}
	146
	147	static void update_random_ops(int n_ops, enum precision prec)
	148	{
	149	int i;
	150
	151	for (i = 0; i < n_ops; i++) {
25f539f3	152
446cfb0d EC	153	switch (prec) {
	154	case PREC_SINGLE:
	155	case PREC_FLOAT32:
f2b84b9e AB	156	{
f2b84b9e AB	157	uint64_t r = random_ops[i];
25f539f3 EC	158	do {
	159	r = xorshift64star(r);
	160	} while (!float32_is_normal(r));
f2b84b9e	161	random_ops[i] = r;
446cfb0d	162	break;
f2b84b9e	163	}
446cfb0d EC	164	case PREC_DOUBLE:
446cfb0d EC	165	case PREC_FLOAT64:
f2b84b9e AB	166	{
f2b84b9e AB	167	uint64_t r = random_ops[i];
25f539f3 EC	168	do {
	169	r = xorshift64star(r);
	170	} while (!float64_is_normal(r));
f2b84b9e AB	171	random_ops[i] = r;
	172	break;
	173	}
	174	case PREC_QUAD:
	175	case PREC_FLOAT128:
	176	{
	177	float128 r = random_quad_ops[i];
	178	uint64_t hi = r.high;
	179	uint64_t lo = r.low;
	180	do {
	181	hi = xorshift64star(hi);
	182	lo = xorshift64star(lo);
	183	r = make_float128(hi, lo);
	184	} while (!float128_is_normal(r));
	185	random_quad_ops[i] = r;
446cfb0d	186	break;
f2b84b9e	187	}
446cfb0d	188	default:
25f539f3 EC	189	g_assert_not_reached();
25f539f3 EC	190	}
25f539f3 EC	191	}
	192	}
	193
	194	static void fill_random(union fp *ops, int n_ops, enum precision prec,
	195	bool no_neg)
	196	{
	197	int i;
	198
	199	for (i = 0; i < n_ops; i++) {
	200	switch (prec) {
	201	case PREC_SINGLE:
	202	case PREC_FLOAT32:
	203	ops[i].f32 = make_float32(random_ops[i]);
	204	if (no_neg && float32_is_neg(ops[i].f32)) {
	205	ops[i].f32 = float32_chs(ops[i].f32);
	206	}
25f539f3 EC	207	break;
	208	case PREC_DOUBLE:
	209	case PREC_FLOAT64:
	210	ops[i].f64 = make_float64(random_ops[i]);
	211	if (no_neg && float64_is_neg(ops[i].f64)) {
	212	ops[i].f64 = float64_chs(ops[i].f64);
	213	}
25f539f3	214	break;
f2b84b9e AB	215	case PREC_QUAD:
	216	case PREC_FLOAT128:
	217	ops[i].f128 = random_quad_ops[i];
	218	if (no_neg && float128_is_neg(ops[i].f128)) {
	219	ops[i].f128 = float128_chs(ops[i].f128);
	220	}
	221	break;
25f539f3 EC	222	default:
	223	g_assert_not_reached();
	224	}
	225	}
	226	}
	227
	228	/*
	229	* The main benchmark function. Instead of (ab)using macros, we rely
	230	* on the compiler to unfold this at compile-time.
	231	*/
	232	static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
	233	{
	234	int64_t tf = get_clock() + duration * 1000000000LL;
	235
	236	while (get_clock() < tf) {
	237	union fp ops[MAX_OPERANDS];
	238	int64_t t0;
	239	int i;
	240
	241	update_random_ops(n_ops, prec);
	242	switch (prec) {
	243	case PREC_SINGLE:
	244	fill_random(ops, n_ops, prec, no_neg);
	245	t0 = get_clock();
	246	for (i = 0; i < OPS_PER_ITER; i++) {
	247	float a = ops[0].f;
	248	float b = ops[1].f;
	249	float c = ops[2].f;
	250
	251	switch (op) {
	252	case OP_ADD:
	253	res.f = a + b;
	254	break;
	255	case OP_SUB:
	256	res.f = a - b;
	257	break;
	258	case OP_MUL:
	259	res.f = a * b;
	260	break;
	261	case OP_DIV:
	262	res.f = a / b;
	263	break;
	264	case OP_FMA:
	265	res.f = fmaf(a, b, c);
	266	break;
	267	case OP_SQRT:
	268	res.f = sqrtf(a);
	269	break;
	270	case OP_CMP:
	271	res.u64 = isgreater(a, b);
	272	break;
	273	default:
	274	g_assert_not_reached();
	275	}
	276	}
	277	break;
	278	case PREC_DOUBLE:
	279	fill_random(ops, n_ops, prec, no_neg);
	280	t0 = get_clock();
	281	for (i = 0; i < OPS_PER_ITER; i++) {
	282	double a = ops[0].d;
	283	double b = ops[1].d;
	284	double c = ops[2].d;
	285
286	switch (op) {
287	case OP_ADD:
288	res.d = a + b;
289	break;
290	case OP_SUB:
291	res.d = a - b;
292	break;
293	case OP_MUL:
294	res.d = a * b;
295	break;
296	case OP_DIV:
297	res.d = a / b;
298	break;
299	case OP_FMA:
300	res.d = fma(a, b, c);
301	break;
302	case OP_SQRT:
303	res.d = sqrt(a);
304	break;
305	case OP_CMP:
306	res.u64 = isgreater(a, b);
307	break;
308	default:
309	g_assert_not_reached();
310	}
311	}
312	break;
313	case PREC_FLOAT32:
314	fill_random(ops, n_ops, prec, no_neg);
315	t0 = get_clock();
316	for (i = 0; i < OPS_PER_ITER; i++) {
317	float32 a = ops[0].f32;
318	float32 b = ops[1].f32;
319	float32 c = ops[2].f32;
320
321	switch (op) {
322	case OP_ADD:
323	res.f32 = float32_add(a, b, &soft_status);
324	break;
325	case OP_SUB:
326	res.f32 = float32_sub(a, b, &soft_status);
327	break;
328	case OP_MUL:
329	res.f = float32_mul(a, b, &soft_status);
330	break;
331	case OP_DIV:
332	res.f32 = float32_div(a, b, &soft_status);
333	break;
334	case OP_FMA:
335	res.f32 = float32_muladd(a, b, c, 0, &soft_status);
336	break;
337	case OP_SQRT:
338	res.f32 = float32_sqrt(a, &soft_status);
339	break;
340	case OP_CMP:
341	res.u64 = float32_compare_quiet(a, b, &soft_status);
342	break;
343	default:
344	g_assert_not_reached();
345	}
346	}
347	break;
348	case PREC_FLOAT64:
349	fill_random(ops, n_ops, prec, no_neg);
350	t0 = get_clock();
351	for (i = 0; i < OPS_PER_ITER; i++) {
352	float64 a = ops[0].f64;
353	float64 b = ops[1].f64;
354	float64 c = ops[2].f64;
355
356	switch (op) {
357	case OP_ADD:
358	res.f64 = float64_add(a, b, &soft_status);
359	break;
360	case OP_SUB:
361	res.f64 = float64_sub(a, b, &soft_status);
362	break;
363	case OP_MUL:
364	res.f = float64_mul(a, b, &soft_status);
365	break;
366	case OP_DIV:
367	res.f64 = float64_div(a, b, &soft_status);
368	break;
369	case OP_FMA:
370	res.f64 = float64_muladd(a, b, c, 0, &soft_status);
371	break;
372	case OP_SQRT:
373	res.f64 = float64_sqrt(a, &soft_status);
374	break;
375	case OP_CMP:
376	res.u64 = float64_compare_quiet(a, b, &soft_status);
377	break;
378	default:
379	g_assert_not_reached();
380	}
381	}
382	break;
f2b84b9e AB	383	case PREC_FLOAT128:
	384	fill_random(ops, n_ops, prec, no_neg);
	385	t0 = get_clock();
	386	for (i = 0; i < OPS_PER_ITER; i++) {
	387	float128 a = ops[0].f128;
	388	float128 b = ops[1].f128;
dedd123c	389	float128 c = ops[2].f128;
f2b84b9e AB	390
	391	switch (op) {
	392	case OP_ADD:
	393	res.f128 = float128_add(a, b, &soft_status);
	394	break;
	395	case OP_SUB:
	396	res.f128 = float128_sub(a, b, &soft_status);
	397	break;
	398	case OP_MUL:
	399	res.f128 = float128_mul(a, b, &soft_status);
	400	break;
	401	case OP_DIV:
	402	res.f128 = float128_div(a, b, &soft_status);
	403	break;
dedd123c RH	404	case OP_FMA:
	405	res.f128 = float128_muladd(a, b, c, 0, &soft_status);
	406	break;
f2b84b9e AB	407	case OP_SQRT:
	408	res.f128 = float128_sqrt(a, &soft_status);
	409	break;
	410	case OP_CMP:
	411	res.u64 = float128_compare_quiet(a, b, &soft_status);
	412	break;
	413	default:
	414	g_assert_not_reached();
	415	}
	416	}
	417	break;
25f539f3 EC	418	default:
	419	g_assert_not_reached();
	420	}
	421	ns_elapsed += get_clock() - t0;
	422	n_completed_ops += OPS_PER_ITER;
	423	}
	424	}
	425
	426	#define GEN_BENCH(name, type, prec, op, n_ops) \
	427	static void __attribute__((flatten)) name(void) \
	428	{ \
	429	bench(prec, op, n_ops, false); \
	430	}
	431
	432	#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
	433	static void __attribute__((flatten)) name(void) \
	434	{ \
	435	bench(prec, op, n_ops, true); \
	436	}
	437
	438	#define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
	439	GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
	440	GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
	441	GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
f2b84b9e AB	442	GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
f2b84b9e AB	443	GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
25f539f3 EC	444
	445	GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
	446	GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
	447	GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
	448	GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
	449	GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
	450	GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
	451	#undef GEN_BENCH_ALL_TYPES
	452
	453	#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
	454	GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
	455	GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
	456	GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
f2b84b9e AB	457	GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
f2b84b9e AB	458	GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
25f539f3 EC	459
	460	GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
	461	#undef GEN_BENCH_ALL_TYPES_NO_NEG
	462
	463	#undef GEN_BENCH_NO_NEG
	464	#undef GEN_BENCH
	465
	466	#define GEN_BENCH_FUNCS(opname, op) \
	467	[op] = { \
	468	[PREC_SINGLE] = bench_ ## opname ## _float, \
	469	[PREC_DOUBLE] = bench_ ## opname ## _double, \
	470	[PREC_FLOAT32] = bench_ ## opname ## _float32, \
	471	[PREC_FLOAT64] = bench_ ## opname ## _float64, \
f2b84b9e	472	[PREC_FLOAT128] = bench_ ## opname ## _float128, \
25f539f3 EC	473	}
	474
	475	static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
	476	GEN_BENCH_FUNCS(add, OP_ADD),
	477	GEN_BENCH_FUNCS(sub, OP_SUB),
	478	GEN_BENCH_FUNCS(mul, OP_MUL),
	479	GEN_BENCH_FUNCS(div, OP_DIV),
	480	GEN_BENCH_FUNCS(fma, OP_FMA),
	481	GEN_BENCH_FUNCS(sqrt, OP_SQRT),
	482	GEN_BENCH_FUNCS(cmp, OP_CMP),
	483	};
	484
	485	#undef GEN_BENCH_FUNCS
	486
	487	static void run_bench(void)
	488	{
	489	bench_func_t f;
	490
	491	f = bench_funcs[operation][precision];
	492	g_assert(f);
	493	f();
	494	}
	495
	496	/* @arr must be NULL-terminated */
	497	static int find_name(const char * const arr, const char name)
	498	{
	499	int i;
	500
	501	for (i = 0; arr[i] != NULL; i++) {
	502	if (strcmp(name, arr[i]) == 0) {
	503	return i;
	504	}
	505	}
	506	return -1;
	507	}
	508
	509	static void usage_complete(int argc, char *argv[])
	510	{
	511	gchar op_list = g_strjoinv(", ", (gchar *)op_names);
	512	gchar tester_list = g_strjoinv(", ", (gchar *)tester_names);
	513
	514	fprintf(stderr, "Usage: %s [options]\n", argv[0]);
	515	fprintf(stderr, "options:\n");
	516	fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
	517	DEFAULT_DURATION_SECS);
	518	fprintf(stderr, " -h = show this help message.\n");
	519	fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
	520	op_list, op_names[0]);
f2b84b9e	521	fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
25f539f3 EC	522	"Default: single\n");
	523	fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
	524	"Default: even\n");
	525	fprintf(stderr, " -t = tester (%s). Default: %s\n",
	526	tester_list, tester_names[0]);
	527	fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
	528	"Default: disabled\n");
	529	fprintf(stderr, " -Z = flush output to zero (soft tester only). "
	530	"Default: disabled\n");
	531
	532	g_free(tester_list);
	533	g_free(op_list);
	534	}
	535
	536	static int round_name_to_mode(const char *name)
	537	{
	538	int i;
	539
	540	for (i = 0; i < N_ROUND_MODES; i++) {
	541	if (!strcmp(round_names[i], name)) {
	542	return i;
	543	}
	544	}
	545	return -1;
	546	}
	547
8905770b MAL	548	static G_NORETURN
8905770b MAL	549	void die_host_rounding(enum rounding rounding)
25f539f3 EC	550	{
	551	fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
	552	round_names[rounding]);
	553	exit(EXIT_FAILURE);
	554	}
	555
	556	static void set_host_precision(enum rounding rounding)
	557	{
	558	int rhost;
	559
	560	switch (rounding) {
	561	case ROUND_EVEN:
	562	rhost = FE_TONEAREST;
	563	break;
	564	case ROUND_ZERO:
	565	rhost = FE_TOWARDZERO;
	566	break;
	567	case ROUND_DOWN:
	568	rhost = FE_DOWNWARD;
	569	break;
	570	case ROUND_UP:
	571	rhost = FE_UPWARD;
	572	break;
	573	case ROUND_TIEAWAY:
	574	die_host_rounding(rounding);
	575	return;
	576	default:
	577	g_assert_not_reached();
	578	}
	579
	580	if (fesetround(rhost)) {
	581	die_host_rounding(rounding);
	582	}
	583	}
	584
	585	static void set_soft_precision(enum rounding rounding)
	586	{
	587	signed char mode;
	588
	589	switch (rounding) {
	590	case ROUND_EVEN:
	591	mode = float_round_nearest_even;
	592	break;
	593	case ROUND_ZERO:
	594	mode = float_round_to_zero;
	595	break;
	596	case ROUND_DOWN:
	597	mode = float_round_down;
	598	break;
	599	case ROUND_UP:
	600	mode = float_round_up;
	601	break;
	602	case ROUND_TIEAWAY:
	603	mode = float_round_ties_away;
	604	break;
	605	default:
	606	g_assert_not_reached();
	607	}
	608	soft_status.float_rounding_mode = mode;
	609	}
	610
	611	static void parse_args(int argc, char *argv[])
	612	{
	613	int c;
614	int val;
615	int rounding = ROUND_EVEN;
616
617	for (;;) {
618	c = getopt(argc, argv, "d:ho:p:r:t:zZ");
619	if (c < 0) {
620	break;
621	}
622	switch (c) {
623	case 'd':
624	duration = atoi(optarg);
625	break;
626	case 'h':
627	usage_complete(argc, argv);
628	exit(EXIT_SUCCESS);
629	case 'o':
630	val = find_name(op_names, optarg);
631	if (val < 0) {
632	fprintf(stderr, "Unsupported op '%s'\n", optarg);
633	exit(EXIT_FAILURE);
634	}
635	operation = val;
636	break;
637	case 'p':
638	if (!strcmp(optarg, "single")) {
639	precision = PREC_SINGLE;
640	} else if (!strcmp(optarg, "double")) {
641	precision = PREC_DOUBLE;
f2b84b9e AB	642	} else if (!strcmp(optarg, "quad")) {
f2b84b9e AB	643	precision = PREC_QUAD;
25f539f3 EC	644	} else {
	645	fprintf(stderr, "Unsupported precision '%s'\n", optarg);
	646	exit(EXIT_FAILURE);
	647	}
	648	break;
	649	case 'r':
	650	rounding = round_name_to_mode(optarg);
	651	if (rounding < 0) {
	652	fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
	653	exit(EXIT_FAILURE);
	654	}
	655	break;
	656	case 't':
	657	val = find_name(tester_names, optarg);
	658	if (val < 0) {
	659	fprintf(stderr, "Unsupported tester '%s'\n", optarg);
	660	exit(EXIT_FAILURE);
	661	}
	662	tester = val;
	663	break;
	664	case 'z':
	665	soft_status.flush_inputs_to_zero = 1;
	666	break;
	667	case 'Z':
	668	soft_status.flush_to_zero = 1;
	669	break;
	670	}
	671	}
	672
	673	/* set precision and rounding mode based on the tester */
	674	switch (tester) {
	675	case TESTER_HOST:
	676	set_host_precision(rounding);
	677	break;
	678	case TESTER_SOFT:
	679	set_soft_precision(rounding);
	680	switch (precision) {
	681	case PREC_SINGLE:
	682	precision = PREC_FLOAT32;
	683	break;
	684	case PREC_DOUBLE:
	685	precision = PREC_FLOAT64;
	686	break;
f2b84b9e AB	687	case PREC_QUAD:
	688	precision = PREC_FLOAT128;
	689	break;
25f539f3 EC	690	default:
	691	g_assert_not_reached();
	692	}
	693	break;
	694	default:
	695	g_assert_not_reached();
	696	}
	697	}
	698
	699	static void pr_stats(void)
	700	{
	701	printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
	702	}
	703
	704	int main(int argc, char *argv[])
	705	{
	706	parse_args(argc, argv);
	707	run_bench();
	708	pr_stats();
	709	return 0;
	710	}