2 * Test Floating Point Conversion
5 /* we want additional float type definitions */
6 #define __STDC_WANT_IEC_60559_BFP_EXT__
7 #define __STDC_WANT_IEC_60559_TYPES_EXT__
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 static char flag_str[256];
19 static char *get_flag_state(int flags)
22 snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s",
23 flags & FE_OVERFLOW ? "OVERFLOW" : "",
24 flags & FE_UNDERFLOW ? "UNDERFLOW" : "",
25 flags & FE_DIVBYZERO ? "DIV0" : "",
26 flags & FE_INEXACT ? "INEXACT" : "",
27 flags & FE_INVALID ? "INVALID" : "");
29 snprintf(flag_str, sizeof(flag_str), "OK");
35 static void print_double_number(int i, double num)
37 uint64_t double_as_hex = *(uint64_t *) #
38 int flags = fetestexcept(FE_ALL_EXCEPT);
39 char *fstr = get_flag_state(flags);
41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n",
42 i, num, double_as_hex, flags, fstr);
45 static void print_single_number(int i, float num)
47 uint32_t single_as_hex = *(uint32_t *) #
48 int flags = fetestexcept(FE_ALL_EXCEPT);
49 char *fstr = get_flag_state(flags);
51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n",
52 i, num, single_as_hex, flags, fstr);
55 static void print_half_number(int i, uint16_t num)
57 int flags = fetestexcept(FE_ALL_EXCEPT);
58 char *fstr = get_flag_state(flags);
60 printf("%02d HALF: %#04x (%#x => %s)\n",
64 static void print_int64(int i, int64_t num)
66 uint64_t int64_as_hex = *(uint64_t *) #
67 int flags = fetestexcept(FE_ALL_EXCEPT);
68 char *fstr = get_flag_state(flags);
70 printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n",
71 i, num, int64_as_hex, flags, fstr);
75 /* Signaling NaN macros, if supported. */
76 # define SNANF (__builtin_nansf (""))
77 # define SNAN (__builtin_nans (""))
78 # define SNANL (__builtin_nansl (""))
81 float single_numbers[] = { -SNANF,
93 5.96046E-8, /* min positive FP16 subnormal */
94 6.09756E-5, /* max subnormal FP16 */
95 6.10352E-5, /* min positive normal FP16 */
97 1.0009765625, /* smallest float after 1.0 FP16 */
101 65504.0, /* max FP16 */
104 131008.0, /* max AFP */
112 static void convert_single_to_half(void)
116 printf("Converting single-precision to half-precision\n");
118 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
119 float input = single_numbers[i];
121 feclearexcept(FE_ALL_EXCEPT);
123 print_single_number(i, input);
126 asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input));
129 asm("fcvt %h0, %s1" : "=w" (output) : "x" (input));
131 print_half_number(i, output);
135 static void convert_single_to_double(void)
139 printf("Converting single-precision to double-precision\n");
141 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
142 float input = single_numbers[i];
143 /* uint64_t output; */
146 feclearexcept(FE_ALL_EXCEPT);
148 print_single_number(i, input);
150 asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input));
152 asm("fcvt %d0, %s1" : "=w" (output) : "x" (input));
154 print_double_number(i, output);
158 static void convert_single_to_integer(void)
162 printf("Converting single-precision to integer\n");
164 for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) {
165 float input = single_numbers[i];
168 feclearexcept(FE_ALL_EXCEPT);
170 print_single_number(i, input);
172 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
175 asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input));
177 print_int64(i, output);
181 /* This allows us to initialise some doubles as pure hex */
187 test_doubles double_numbers[] = {
195 {-1.111E+30}, /* half prec */
202 {5.96046E-8}, /* min positive FP16 subnormal */
203 {6.09756E-5}, /* max subnormal FP16 */
204 {6.10352E-5}, /* min positive normal FP16 */
206 {1.0009765625}, /* smallest float after 1.0 FP16 */
208 {1.3789972848607228e-308},
209 {1.4914738736681624e-308},
213 {65504.0}, /* max FP16 */
216 {131008.0}, /* max AFP */
218 {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */
224 {.h = 0x7ff0000000000001}, /* SNAN */
228 static void convert_double_to_half(void)
232 printf("Converting double-precision to half-precision\n");
234 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
235 double input = double_numbers[i].d;
238 feclearexcept(FE_ALL_EXCEPT);
240 print_double_number(i, input);
242 /* as we don't have _Float16 support */
244 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */
247 asm("fcvt %h0, %d1" : "=w" (output) : "x" (input));
249 print_half_number(i, output);
253 static void convert_double_to_single(void)
257 printf("Converting double-precision to single-precision\n");
259 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
260 double input = double_numbers[i].d;
263 feclearexcept(FE_ALL_EXCEPT);
265 print_double_number(i, input);
268 asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input));
270 asm("fcvt %s0, %d1" : "=w" (output) : "x" (input));
273 print_single_number(i, output);
277 static void convert_double_to_integer(void)
281 printf("Converting double-precision to integer\n");
283 for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) {
284 double input = double_numbers[i].d;
287 feclearexcept(FE_ALL_EXCEPT);
289 print_double_number(i, input);
291 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
294 asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input));
296 print_int64(i, output);
300 /* no handy defines for these numbers */
301 uint16_t half_numbers[] = {
302 0xffff, /* -NaN / AHP -Max */
303 0xfcff, /* -NaN / AHP */
304 0xfc01, /* -NaN / AHP */
309 0x8001, /* -MIN subnormal */
312 0x0001, /* MIN subnormal */
316 0x7c01, /* NaN / AHP */
317 0x7cff, /* NaN / AHP */
318 0x7fff, /* NaN / AHP +Max*/
321 static void convert_half_to_double(void)
325 printf("Converting half-precision to double-precision\n");
327 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
328 uint16_t input = half_numbers[i];
331 feclearexcept(FE_ALL_EXCEPT);
333 print_half_number(i, input);
335 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */
338 asm("fcvt %d0, %h1" : "=w" (output) : "x" (input));
340 print_double_number(i, output);
344 static void convert_half_to_single(void)
348 printf("Converting half-precision to single-precision\n");
350 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
351 uint16_t input = half_numbers[i];
354 feclearexcept(FE_ALL_EXCEPT);
356 print_half_number(i, input);
358 asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input));
360 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
362 print_single_number(i, output);
366 static void convert_half_to_integer(void)
370 printf("Converting half-precision to integer\n");
372 for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) {
373 uint16_t input = half_numbers[i];
376 feclearexcept(FE_ALL_EXCEPT);
378 print_half_number(i, input);
380 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/
383 asm("fcvt %s0, %h1" : "=w" (output) : "x" (input));
385 print_int64(i, output);
394 float_mapping round_flags[] = {
395 { FE_TONEAREST, "to nearest" },
396 { FE_UPWARD, "upwards" },
397 { FE_DOWNWARD, "downwards" },
398 { FE_TOWARDZERO, "to zero" }
401 int main(int argc, char *argv[argc])
405 printf("#### Enabling IEEE Half Precision\n");
407 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
408 fesetround(round_flags[i].flag);
409 printf("### Rounding %s\n", round_flags[i].desc);
410 convert_single_to_half();
411 convert_single_to_double();
412 convert_double_to_half();
413 convert_double_to_single();
414 convert_half_to_single();
415 convert_half_to_double();
418 /* convert to integer */
419 convert_single_to_integer();
420 convert_double_to_integer();
421 convert_half_to_integer();
423 /* And now with ARM alternative FP16 */
425 /* See glibc sysdeps/arm/fpu_control.h */
426 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t"
427 "orr r1, r1, %[flags]\n\t"
428 "mcr p10, 7, r1, cr1, cr0, 0\n\t"
429 : /* no output */ : [flags] "n" (1 << 26) : "r1" );
431 asm("mrs x1, fpcr\n\t"
432 "orr x1, x1, %[flags]\n\t"
434 : /* no output */ : [flags] "n" (1 << 26) : "x1" );
437 printf("#### Enabling ARM Alternative Half Precision\n");
439 for (i = 0; i < ARRAY_SIZE(round_flags); ++i) {
440 fesetround(round_flags[i].flag);
441 printf("### Rounding %s\n", round_flags[i].desc);
442 convert_single_to_half();
443 convert_single_to_double();
444 convert_double_to_half();
445 convert_double_to_single();
446 convert_half_to_single();
447 convert_half_to_double();
450 /* convert to integer */
451 convert_single_to_integer();
452 convert_double_to_integer();
453 convert_half_to_integer();