1
2 /* Copyright (C) 2006 Dave Nomura
3 dcnltc@us.ibm.com
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <limits.h>
26
27 typedef enum { FALSE=0, TRUE } bool_t;
28
29 typedef enum {
30 FADDS, FSUBS, FMULS, FDIVS,
31 FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32 FADD, FSUB, FMUL, FDIV, FMADD,
33 FMSUB, FNMADD, FNMSUB, FSQRT
34 } flt_op_t;
35
36 typedef enum {
37 TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38 char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39
40 const char *flt_op_names[] = {
41 "fadds", "fsubs", "fmuls", "fdivs",
42 "fmadds", "fmsubs", "fnmadds", "fnmsubs",
43 "fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44 "fnmsub", "fsqrt"
45 };
46
47 typedef unsigned int fpscr_t;
48
49 typedef union {
50 float flt;
51 struct {
52 #if defined(VGP_ppc64le_linux)
53 unsigned int frac:23;
54 unsigned int exp:8;
55 unsigned int sign:1;
56 #else
57 unsigned int sign:1;
58 unsigned int exp:8;
59 unsigned int frac:23;
60 #endif
61 } layout;
62 } flt_overlay;
63
64 typedef union {
65 double dbl;
66 struct {
67 #if defined(VGP_ppc64le_linux)
68 unsigned int frac_lo:32;
69 unsigned int frac_hi:20;
70 unsigned int exp:11;
71 unsigned int sign:1;
72 #else
73 unsigned int sign:1;
74 unsigned int exp:11;
75 unsigned int frac_hi:20;
76 unsigned int frac_lo:32;
77 #endif
78 } layout;
79 struct {
80 unsigned int hi;
81 unsigned int lo;
82 } dbl_pair;
83 } dbl_overlay;
84
85 void assert_fail(const char *msg,
86 const char* expr, const char* file, int line, const char*fn);
87
88 #define STRING(__str) #__str
89 #define assert(msg, expr) \
90 ((void) ((expr) ? 0 : \
91 (assert_fail (msg, STRING(expr), \
92 __FILE__, __LINE__, \
93 __PRETTY_FUNCTION__), 0)))
94 float denorm_small;
95 double dbl_denorm_small;
96 float norm_small;
97 bool_t debug = FALSE;
98 bool_t long_is_64_bits = sizeof(long) == 8;
99
assert_fail(msg,expr,file,line,fn)100 void assert_fail (msg, expr, file, line, fn)
101 const char* msg;
102 const char* expr;
103 const char* file;
104 int line;
105 const char*fn;
106 {
107 printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
108 msg, file, line, fn, expr );
109 exit( 1 );
110 }
set_rounding_mode(round_mode_t mode)111 void set_rounding_mode(round_mode_t mode)
112 {
113 switch(mode) {
114 case TO_NEAREST:
115 asm volatile("mtfsfi 7, 0");
116 break;
117 case TO_ZERO:
118 asm volatile("mtfsfi 7, 1");
119 break;
120 case TO_PLUS_INFINITY:
121 asm volatile("mtfsfi 7, 2");
122 break;
123 case TO_MINUS_INFINITY:
124 asm volatile("mtfsfi 7, 3");
125 break;
126 }
127 }
128
print_double(char * msg,double dbl)129 void print_double(char *msg, double dbl)
130 {
131 dbl_overlay D;
132 D.dbl = dbl;
133
134 printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
135 msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
136 D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
137 }
138
print_single(char * msg,float * flt)139 void print_single(char *msg, float *flt)
140 {
141 flt_overlay F;
142 F.flt = *flt;
143
144 /* NOTE: for the purposes of comparing the fraction of a single with
145 ** a double left shift the .frac so that hex digits are grouped
146 ** from left to right. this is necessary because the size of a
147 ** single mantissa (23) bits is not a multiple of 4
148 */
149 printf("%15s : flt %-20a = %c(%4d, %06x)\n",
150 msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
151 }
152
check_dbl_to_flt_round(round_mode_t mode,double dbl,float * expected)153 int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
154 {
155 int status = 0;
156 flt_overlay R, E;
157 char *result;
158
159 set_rounding_mode(mode);
160
161 E.flt = *expected;
162 R.flt = (float)dbl;
163
164 if ((R.layout.sign != E.layout.sign) ||
165 (R.layout.exp != E.layout.exp) ||
166 (R.layout.frac != E.layout.frac)) {
167 result = "FAILED";
168 status = 1;
169 } else {
170 result = "PASSED";
171 status = 0;
172 }
173 printf("%s:%s:(double)(%-20a) = %20a",
174 round_mode_name[mode], result, R.flt, dbl);
175 if (status) {
176 print_single("\n\texpected", &E.flt);
177 print_single("\n\trounded ", &R.flt);
178 }
179 putchar('\n');
180 return status;
181 }
182
test_dbl_to_float_convert(char * msg,float * base)183 int test_dbl_to_float_convert(char *msg, float *base)
184 {
185 int status = 0;
186 double half = (double)denorm_small/2;
187 double qtr = half/2;
188 double D_hi = (double)*base + half + qtr;
189 double D_lo = (double)*base + half - qtr;
190 float F_lo = *base;
191 float F_hi = F_lo + denorm_small;
192
193
194 /*
195 ** .....+-----+-----+-----+-----+---....
196 ** ^F_lo ^ ^ ^
197 ** D_lo
198 ** D_hi
199 ** F_hi
200 ** F_lo and F_hi are two consecutive single float model numbers
201 ** denorm_small distance apart. D_lo and D_hi are two numbers
202 ** within that range that are not representable as single floats
203 ** and will be rounded to either F_lo or F_hi.
204 */
205 printf("-------------------------- %s --------------------------\n", msg);
206 if (debug) {
207 print_double("D_lo", D_lo);
208 print_double("D_hi", D_hi);
209 print_single("F_lo", &F_lo);
210 print_single("F_hi", &F_hi);
211 }
212
213 /* round to nearest */
214 status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
215 status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
216
217 /* round to zero */
218 status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
219 status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
220
221 /* round to +inf */
222 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
223 status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
224
225 /* round to -inf */
226 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
227 status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
228 return status;
229 }
230
231 void
init()232 init()
233 {
234 flt_overlay F;
235 dbl_overlay D;
236
237 /* small is the smallest denormalized single float number */
238 F.layout.sign = 0;
239 F.layout.exp = 0;
240 F.layout.frac = 1;
241 denorm_small = F.flt; /* == 2^(-149) */
242 if (debug) {
243 print_single("float small", &F.flt);
244 }
245
246 D.layout.sign = 0;
247 D.layout.exp = 0;
248 D.layout.frac_hi = 0;
249 D.layout.frac_lo = 1;
250 dbl_denorm_small = D.dbl; /* == 2^(-1022) */
251 if (debug) {
252 print_double("double small", D.dbl);
253 }
254
255 /* n_small is the smallest normalized single precision float */
256 F.layout.exp = 1;
257 norm_small = F.flt;
258 }
259
check_int_to_flt_round(round_mode_t mode,long L,float * expected)260 int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
261 {
262 int status = 0;
263 int I = L;
264 char *int_name = "int";
265 flt_overlay R, E;
266 char *result;
267 int iter;
268
269 set_rounding_mode(mode);
270 E.flt = *expected;
271
272 for (iter = 0; iter < 2; iter++) {
273 int stat = 0;
274 R.flt = (iter == 0 ? (float)I : (float)L);
275
276 if ((R.layout.sign != E.layout.sign) ||
277 (R.layout.exp != E.layout.exp) ||
278 (R.layout.frac != E.layout.frac)) {
279 result = "FAILED";
280 stat = 1;
281 } else {
282 result = "PASSED";
283 stat = 0;
284 }
285 printf("%s:%s:(float)(%4s)%9d = %11.1f",
286 round_mode_name[mode], result, int_name, I, R.flt);
287 if (stat) {
288 print_single("\n\texpected: %.1f ", &E.flt);
289 print_single("\n\trounded ", &R.flt);
290 }
291 putchar('\n');
292 status |= stat;
293
294 if (!long_is_64_bits) break;
295 int_name = "long";
296 }
297 return status;
298 }
299
check_long_to_dbl_round(round_mode_t mode,long L,double * expected)300 int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
301 {
302 int status = 0;
303 dbl_overlay R, E;
304 char *result;
305
306 set_rounding_mode(mode);
307 E.dbl = *expected;
308
309 R.dbl = (double)L;
310
311 if ((R.layout.sign != E.layout.sign) ||
312 (R.layout.exp != E.layout.exp) ||
313 (R.layout.frac_lo != E.layout.frac_lo) ||
314 (R.layout.frac_hi != E.layout.frac_hi)) {
315 result = "FAILED";
316 status = 1;
317 } else {
318 result = "PASSED";
319 status = 0;
320 }
321 printf("%s:%s:(double)(%18ld) = %20.1f",
322 round_mode_name[mode], result, L, R.dbl);
323 if (status) {
324 printf("\n\texpected %.1f : ", E.dbl);
325 }
326 putchar('\n');
327 return status;
328 }
329
test_int_to_float_convert(char * msg)330 int test_int_to_float_convert(char *msg)
331 {
332 int status = 0;
333 int int24_hi = 0x03ff0fff;
334 int int24_lo = 0x03ff0ffd;
335 float pos_flt_lo = 67047420.0;
336 float pos_flt_hi = 67047424.0;
337 float neg_flt_lo = -67047420.0;
338 float neg_flt_hi = -67047424.0;
339
340 printf("-------------------------- %s --------------------------\n", msg);
341 status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
342 status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
343 status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
344 status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
345 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
346 status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
347 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
348 status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
349
350 status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
351 status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
352 status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
353 status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
354 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
355 status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
356 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
357 status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
358 return status;
359 }
360
361 #ifdef __powerpc64__
test_long_to_double_convert(char * msg)362 int test_long_to_double_convert(char *msg)
363 {
364 int status = 0;
365 long long55_hi = 0x07ff0ffffffffff;
366 long long55_lo = 0x07ff0fffffffffd;
367 double pos_dbl_lo = 36012304344547324.0;
368 double pos_dbl_hi = 36012304344547328.0;
369 double neg_dbl_lo = -36012304344547324.0;
370 double neg_dbl_hi = -36012304344547328.0;
371
372 printf("-------------------------- %s --------------------------\n", msg);
373 status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
374 status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
375 status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
376 status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
377 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
378 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
379 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
380 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
381
382 status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
383 status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
384 status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
385 status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
386 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
387 status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
388 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
389 status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
390 return status;
391 }
392 #endif
393
check_single_arithmetic_op(flt_op_t op)394 int check_single_arithmetic_op(flt_op_t op)
395 {
396 char *result;
397 int status = 0;
398 dbl_overlay R, E;
399 double qtr, half, fA, fB, fD;
400 round_mode_t mode;
401 int q, s;
402 bool_t two_args = TRUE;
403 float whole = denorm_small;
404
405 #define BINOP(op) \
406 __asm__ volatile( \
407 op" %0, %1, %2\n\t" \
408 : "=f"(fD) : "f"(fA) , "f"(fB));
409 #define UNOP(op) \
410 __asm__ volatile( \
411 op" %0, %1\n\t" \
412 : "=f"(fD) : "f"(fA));
413
414 half = (double)whole/2;
415 qtr = half/2;
416
417 if (debug) {
418 print_double("qtr", qtr);
419 print_double("whole", whole);
420 print_double("2*whole", 2*whole);
421 }
422
423 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
424 for (s = -1; s < 2; s += 2)
425 for (q = 1; q < 4; q += 2) {
426 double expected;
427 double lo = s*whole;
428 double hi = s*2*whole;
429
430 switch(op) {
431 case FADDS:
432 fA = s*whole;
433 fB = s*q*qtr;
434 break;
435 case FSUBS:
436 fA = s*2*whole;
437 fB = s*(q == 1 ? 3 : 1)*qtr;
438 break;
439 case FMULS:
440 fA = 0.5;
441 fB = s*(4+q)*half;
442 break;
443 case FDIVS:
444 fA = s*(4+q)*half;
445 fB = 2.0;
446 break;
447 default:
448 assert("check_single_arithmetic_op: unexpected op",
449 FALSE);
450 break;
451 }
452
453 switch(mode) {
454 case TO_NEAREST:
455 expected = (q == 1 ? lo : hi);
456 break;
457 case TO_ZERO:
458 expected = lo;
459 break;
460 case TO_PLUS_INFINITY:
461 expected = (s == 1 ? hi : lo);
462 break;
463 case TO_MINUS_INFINITY:
464 expected = (s == 1 ? lo : hi);
465 break;
466 }
467
468 set_rounding_mode(mode);
469
470 /*
471 ** do the double precision dual operation just for comparison
472 ** when debugging
473 */
474 switch(op) {
475 case FADDS:
476 BINOP("fadds");
477 R.dbl = fD;
478 BINOP("fadd");
479 break;
480 case FSUBS:
481 BINOP("fsubs");
482 R.dbl = fD;
483 BINOP("fsub");
484 break;
485 case FMULS:
486 BINOP("fmuls");
487 R.dbl = fD;
488 BINOP("fmul");
489 break;
490 case FDIVS:
491 BINOP("fdivs");
492 R.dbl = fD;
493 BINOP("fdiv");
494 break;
495 default:
496 assert("check_single_arithmetic_op: unexpected op",
497 FALSE);
498 break;
499 }
500 #undef UNOP
501 #undef BINOP
502
503 E.dbl = expected;
504
505 if ((R.layout.sign != E.layout.sign) ||
506 (R.layout.exp != E.layout.exp) ||
507 (R.layout.frac_lo != E.layout.frac_lo) ||
508 (R.layout.frac_hi != E.layout.frac_hi)) {
509 result = "FAILED";
510 status = 1;
511 } else {
512 result = "PASSED";
513 status = 0;
514 }
515
516 printf("%s:%s:%s(%-13a",
517 round_mode_name[mode], result, flt_op_names[op], fA);
518 if (two_args) printf(", %-13a", fB);
519 printf(") = %-13a", R.dbl);
520 if (status) printf("\n\texpected %a", E.dbl);
521 putchar('\n');
522
523 if (debug) {
524 print_double("hi", hi);
525 print_double("lo", lo);
526 print_double("expected", expected);
527 print_double("got", R.dbl);
528 print_double("double result", fD);
529 }
530 }
531
532 return status;
533 }
534
check_single_guarded_arithmetic_op(flt_op_t op)535 int check_single_guarded_arithmetic_op(flt_op_t op)
536 {
537 typedef struct {
538 int num, den, frac;
539 } fdivs_t;
540
541 char *result;
542 int status = 0;
543 flt_overlay A, B, Z;
544 dbl_overlay Res, Exp;
545 double fA, fB, fC, fD;
546 round_mode_t mode;
547 int g, s;
548 int arg_count;
549
550 fdivs_t divs_guard_cases[16] = {
551 { 105, 56, 0x700000 }, /* : 0 */
552 { 100, 57, 0x608FB8 }, /* : 1 */
553 { 000, 00, 0x000000 }, /* : X */
554 { 100, 52, 0x762762 }, /* : 3 */
555 { 000, 00, 0x000000 }, /* : X */
556 { 100, 55, 0x68BA2E }, /* : 5 */
557 { 000, 00, 0x000000 }, /* : X */
558 { 100, 51, 0x7AFAFA }, /* : 7 */
559 { 000, 00, 0x000000 }, /* : X */
560 { 100, 56, 0x649249 }, /* : 9 */
561 { 000, 00, 0x000000 }, /* : X */
562 { 100, 54, 0x6D097B }, /* : B */
563 { 000, 00, 0x000000 }, /* : X */
564 { 100, 59, 0x58F2FB }, /* : D */
565 { 000, 00, 0x000000 }, /* : X */
566 { 101, 52, 0x789D89 } /* : F */
567 };
568
569 /* 0x1.00000 00000000p-3 */
570 /* set up the invariant fields of B, the arg to cause rounding */
571 B.flt = 0.0;
572 B.layout.exp = 124; /* -3 */
573
574 /* set up args so result is always Z = 1.200000000000<g>p+0 */
575 Z.flt = 1.0;
576 Z.layout.sign = 0;
577
578 #define TERNOP(op) \
579 arg_count = 3; \
580 __asm__ volatile( \
581 op" %0, %1, %2, %3\n\t" \
582 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
583 #define BINOP(op) \
584 arg_count = 2; \
585 __asm__ volatile( \
586 op" %0, %1, %2\n\t" \
587 : "=f"(fD) : "f"(fA) , "f"(fB));
588 #define UNOP(op) \
589 arg_count = 1; \
590 __asm__ volatile( \
591 op" %0, %1\n\t" \
592 : "=f"(fD) : "f"(fA));
593
594 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
595 for (s = -1; s < 2; s += 2)
596 for (g = 0; g < 16; g += 1) {
597 double lo, hi, expected;
598 int LSB;
599 int guard = 0;
600 int z_sign = s;
601
602 /*
603 ** one argument will have exponent = 0 as will the result (by
604 ** design) so choose the other argument with exponent -3 to
605 ** force a 3 bit shift for scaling leaving us with 3 guard bits
606 ** and the LSB bit at the bottom of the manitssa.
607 */
608 switch(op) {
609 case FADDS:
610 /* 1p+0 + 1.00000<g>p-3 */
611 B.layout.frac = g;
612
613 fB = s*B.flt;
614 fA = s*1.0;
615
616 /* set up Z to be truncated result */
617
618 /* mask off LSB from resulting guard bits */
619 guard = g & 7;
620
621 Z.layout.frac = 0x100000 | (g >> 3);
622 break;
623 case FSUBS:
624 /* 1.200002p+0 - 1.000000000000<g>p-3 */
625 A.flt = 1.125;
626 /* add enough to avoid scaling of the result */
627 A.layout.frac |= 0x2;
628 fA = s*A.flt;
629
630 B.layout.frac = g;
631 fB = s*B.flt;
632
633 /* set up Z to be truncated result */
634 guard = (0x10-g);
635 Z.layout.frac = guard>>3;
636
637 /* mask off LSB from resulting guard bits */
638 guard &= 7;
639 break;
640 case FMULS:
641 /* 1 + g*2^-23 */
642 A.flt = 1.0;
643 A.layout.frac = g;
644 fA = s*A.flt;
645 fB = 1.125;
646
647 /* set up Z to be truncated result */
648 Z.flt = 1.0;
649 Z.layout.frac = 0x100000;
650 Z.layout.frac |= g + (g>>3);
651 guard = g & 7;
652 break;
653 case FDIVS:
654 /* g >> 3 == LSB, g & 7 == guard bits */
655 guard = g & 7;
656 if ((guard & 1) == 0) {
657 /* special case: guard bit X = 0 */
658 A.flt = denorm_small;
659 A.layout.frac = g;
660 fA = A.flt;
661 fB = s*8.0;
662 Z.flt = 0.0;
663 Z.layout.frac |= (g >> 3);
664 } else {
665 fA = s*divs_guard_cases[g].num;
666 fB = divs_guard_cases[g].den;
667
668 Z.flt = 1.0;
669 Z.layout.frac = divs_guard_cases[g].frac;
670 }
671 break;
672 case FMADDS:
673 case FMSUBS:
674 case FNMADDS:
675 case FNMSUBS:
676 /* 1 + g*2^-23 */
677 A.flt = 1.0;
678 A.layout.frac = g;
679 fA = s*A.flt;
680 fB = 1.125;
681
682 /* 1.000001p-1 */
683 A.flt = 0.5;
684 A.layout.frac = 1;
685 fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
686
687 /* set up Z to be truncated result */
688 z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
689 guard = ((g & 7) + 0x4) & 7;
690 Z.flt = 1.0;
691 Z.layout.frac = 0x500000;
692 Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
693 break;
694 default:
695 assert("check_single_arithmetic_op: unexpected op",
696 FALSE);
697 break;
698 }
699
700 /* get LSB for tie breaking */
701 LSB = Z.layout.frac & 1;
702
703 /* set up hi and lo */
704 lo = z_sign*Z.flt;
705 Z.layout.frac += 1;
706 hi = z_sign*Z.flt;
707
708 switch(mode) {
709 case TO_NEAREST:
710 /* look at 3 guard bits to determine expected rounding */
711 switch(guard) {
712 case 0:
713 case 1: case 2: case 3:
714 expected = lo;
715 break;
716 case 4: /* tie: round to even */
717 if (debug) printf("tie: LSB = %d\n", LSB);
718 expected = (LSB == 0 ? lo : hi);
719 break;
720 case 5: case 6: case 7:
721 expected = hi;
722 break;
723 default:
724 assert("check_single_guarded_arithmetic_op: unexpected guard",
725 FALSE);
726 }
727 break;
728 case TO_ZERO:
729 expected = lo;
730 break;
731 case TO_PLUS_INFINITY:
732 if (guard == 0) {
733 /* no rounding */
734 expected = lo;
735 } else {
736 expected = (s == 1 ? hi : lo);
737 }
738 break;
739 case TO_MINUS_INFINITY:
740 if (guard == 0) {
741 /* no rounding */
742 expected = lo;
743 } else {
744 expected = (s == 1 ? lo : hi);
745 }
746 break;
747 }
748
749 set_rounding_mode(mode);
750
751 /*
752 ** do the double precision dual operation just for comparison
753 ** when debugging
754 */
755 switch(op) {
756 case FADDS:
757 BINOP("fadds");
758 Res.dbl = fD;
759 break;
760 case FSUBS:
761 BINOP("fsubs");
762 Res.dbl = fD;
763 break;
764 case FMULS:
765 BINOP("fmuls");
766 Res.dbl = fD;
767 break;
768 case FDIVS:
769 BINOP("fdivs");
770 Res.dbl = fD;
771 break;
772 case FMADDS:
773 TERNOP("fmadds");
774 Res.dbl = fD;
775 break;
776 case FMSUBS:
777 TERNOP("fmsubs");
778 Res.dbl = fD;
779 break;
780 case FNMADDS:
781 TERNOP("fnmadds");
782 Res.dbl = fD;
783 break;
784 case FNMSUBS:
785 TERNOP("fnmsubs");
786 Res.dbl = fD;
787 break;
788 default:
789 assert("check_single_guarded_arithmetic_op: unexpected op",
790 FALSE);
791 break;
792 }
793 #undef UNOP
794 #undef BINOP
795 #undef TERNOP
796
797 Exp.dbl = expected;
798
799 if ((Res.layout.sign != Exp.layout.sign) ||
800 (Res.layout.exp != Exp.layout.exp) ||
801 (Res.layout.frac_lo != Exp.layout.frac_lo) ||
802 (Res.layout.frac_hi != Exp.layout.frac_hi)) {
803 result = "FAILED";
804 status = 1;
805 } else {
806 result = "PASSED";
807 status = 0;
808 }
809
810 /* There seems to be some noise in the lower bits. The value
811 * on the least significant digit seems to vary when printing
812 * based on the rounding mode of the compiler. Just trying
813 * to get rid of the noise in the least significant bits when
814 * printing the operand.
815 */
816
817 fA = ((long int)(fA*10000))/10000.0;
818 /* Change -0.0 to a positive 0.0. Some compilers print -0.0
819 * others do not. Make it consistent.
820 */
821 if (fA == -0.0)
822 fA = 0.0;
823
824 printf("%s:%s:%s(%-13.6f",
825 round_mode_name[mode], result, flt_op_names[op], fA);
826 if (arg_count > 1) printf(", %-13a", fB);
827 if (arg_count > 2) printf(", %-13a", fC);
828 printf(") = %-13a", Res.dbl);
829 if (status) printf("\n\texpected %a", Exp.dbl);
830 putchar('\n');
831
832 if (debug) {
833 print_double("hi", hi);
834 print_double("lo", lo);
835 print_double("expected", expected);
836 print_double("got", Res.dbl);
837 }
838 }
839
840 return status;
841 }
842
check_double_guarded_arithmetic_op(flt_op_t op)843 int check_double_guarded_arithmetic_op(flt_op_t op)
844 {
845 typedef struct {
846 int num, den, hi, lo;
847 } fdiv_t;
848 typedef struct {
849 double arg;
850 int exp, hi, lo;
851 } fsqrt_t;
852
853 char *result;
854 int status = 0;
855 dbl_overlay A, B, Z;
856 dbl_overlay Res, Exp;
857 double fA, fB, fC, fD;
858 round_mode_t mode;
859 int g, s;
860 int arg_count;
861 fdiv_t div_guard_cases[16] = {
862 { 62, 62, 0x00000, 0x00000000 }, /* 0 */
863 { 64, 62, 0x08421, 0x08421084 }, /* 1 */
864 { 66, 62, 0x10842, 0x10842108 }, /* 2 */
865 { 100, 62, 0x9ce73, 0x9ce739ce }, /* 3 */
866 { 100, 62, 0x9ce73, 0x9ce739ce }, /* X */
867 { 102, 62, 0xa5294, 0xa5294a52 }, /* 5 */
868 { 106, 62, 0xb5ad6, 0xb5ad6b5a }, /* 6 */
869 { 108, 62, 0xbdef7, 0xbdef7bde }, /* 7 */
870 { 108, 108, 0x00000, 0x00000000 }, /* 8 */
871 { 112, 62, 0xce739, 0xce739ce7 }, /* 9 */
872 { 114, 62, 0xd6b5a, 0xd6b5ad6b }, /* A */
873 { 116, 62, 0xdef7b, 0xdef7bdef }, /* B */
874 { 84, 62, 0x5ad6b, 0x5ad6b5ad }, /* X */
875 { 118, 62, 0xe739c, 0xe739ce73 }, /* D */
876 { 90, 62, 0x739ce, 0x739ce739 }, /* E */
877 { 92, 62, 0x7bdef, 0x7bdef7bd } /* F */
878 };
879
880
881 fsqrt_t sqrt_guard_cases[16] = {
882 { 0x1.08800p0, 0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440 */
883 { 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910 */
884 { 0x1.A8220p0, 0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411 */
885 { 0x1.05A20p0, 0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1 */
886 { 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541 */
887 { 0x1.DCA20p0, 0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51 */
888 { 0x1.02C80p0, 0, 0x01630, 0x9cde7483}, /* :6 B6.8164 */
889 { 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40 */
890 { 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9 */
891 { 0x1.1D020p0, 0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81 */
892 { 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4 */
893 { 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400 */
894 { 0x1.48520p0, 0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429 */
895 { 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61 */
896 { 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684 */
897 { 0x1.76B20p0, 0, 0x35b67, 0x81aed827} /* :F DB.BB59 */
898 };
899
900 /* 0x1.00000 00000000p-3 */
901 /* set up the invariant fields of B, the arg to cause rounding */
902 B.dbl = 0.0;
903 B.layout.exp = 1020;
904
905 /* set up args so result is always Z = 1.200000000000<g>p+0 */
906 Z.dbl = 1.0;
907 Z.layout.sign = 0;
908
909 #define TERNOP(op) \
910 arg_count = 3; \
911 __asm__ volatile( \
912 op" %0, %1, %2, %3\n\t" \
913 : "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
914 #define BINOP(op) \
915 arg_count = 2; \
916 __asm__ volatile( \
917 op" %0, %1, %2\n\t" \
918 : "=f"(fD) : "f"(fA) , "f"(fB));
919 #define UNOP(op) \
920 arg_count = 1; \
921 __asm__ volatile( \
922 op" %0, %1\n\t" \
923 : "=f"(fD) : "f"(fA));
924
925 for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
926 for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
927 for (g = 0; g < 16; g += 1) {
928 double lo, hi, expected;
929 int LSB;
930 int guard;
931 int z_sign = s;
932
933 /*
934 ** one argument will have exponent = 0 as will the result (by
935 ** design) so choose the other argument with exponent -3 to
936 ** force a 3 bit shift for scaling leaving us with 3 guard bits
937 ** and the LSB bit at the bottom of the manitssa.
938 */
939 switch(op) {
940 case FADD:
941 /* 1p+0 + 1.000000000000<g>p-3 */
942 B.layout.frac_lo = g;
943
944 fB = s*B.dbl;
945 fA = s*1.0;
946
947 /* set up Z to be truncated result */
948
949 /* mask off LSB from resulting guard bits */
950 guard = g & 7;
951
952 Z.layout.frac_hi = 0x20000;
953 Z.layout.frac_lo = g >> 3;
954
955 break;
956 case FSUB:
957 /* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
958 A.dbl = 1.125;
959 /* add enough to avoid scaling of the result */
960 A.layout.frac_lo = 0x2;
961 fA = s*A.dbl;
962
963 B.layout.frac_lo = g;
964 fB = s*B.dbl;
965
966 /* set up Z to be truncated result */
967 guard = (0x10-g);
968 Z.layout.frac_hi = 0x0;
969 Z.layout.frac_lo = guard>>3;
970
971 /* mask off LSB from resulting guard bits */
972 guard &= 7;
973 break;
974 case FMUL:
975 /* 1 + g*2^-52 */
976 A.dbl = 1.0;
977 A.layout.frac_lo = g;
978 fA = s*A.dbl;
979 fB = 1.125;
980
981 /* set up Z to be truncated result */
982 Z.dbl = 1.0;
983 Z.layout.frac_hi = 0x20000;
984 Z.layout.frac_lo = g + (g>>3);
985 guard = g & 7;
986 break;
987 case FMADD:
988 case FMSUB:
989 case FNMADD:
990 case FNMSUB:
991 /* 1 + g*2^-52 */
992 A.dbl = 1.0;
993 A.layout.frac_lo = g;
994 fA = s*A.dbl;
995 fB = 1.125;
996
997 /* 1.0000000000001p-1 */
998 A.dbl = 0.5;
999 A.layout.frac_lo = 1;
1000 fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
1001
1002 /* set up Z to be truncated result */
1003 z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
1004 guard = ((g & 7) + 0x4) & 7;
1005 Z.dbl = 1.0;
1006 Z.layout.frac_hi = 0xa0000;
1007 Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
1008 break;
1009 case FDIV:
1010 /* g >> 3 == LSB, g & 7 == guard bits */
1011 guard = g & 7;
1012 if (guard == 0x4) {
1013 /* special case guard bits == 4, inexact tie */
1014 fB = s*2.0;
1015 Z.dbl = 0.0;
1016 if (g >> 3) {
1017 fA = dbl_denorm_small + 2*dbl_denorm_small;
1018 Z.layout.frac_lo = 0x1;
1019 } else {
1020 fA = dbl_denorm_small;
1021 }
1022 } else {
1023 fA = s*div_guard_cases[g].num;
1024 fB = div_guard_cases[g].den;
1025
1026 printf("%d/%d\n",
1027 s*div_guard_cases[g].num,
1028 div_guard_cases[g].den);
1029 Z.dbl = 1.0;
1030 Z.layout.frac_hi = div_guard_cases[g].hi;
1031 Z.layout.frac_lo = div_guard_cases[g].lo;
1032 }
1033 break;
1034 case FSQRT:
1035 fA = s*sqrt_guard_cases[g].arg;
1036 Z.dbl = 1.0;
1037 Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1038 Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1039 Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1040 guard = g >> 1;
1041 if (g & 1) guard |= 1;
1042 /* don't have test cases for when X bit = 0 */
1043 if (guard == 0 || guard == 4) continue;
1044 break;
1045 default:
1046 assert("check_double_guarded_arithmetic_op: unexpected op",
1047 FALSE);
1048 break;
1049 }
1050
1051 /* get LSB for tie breaking */
1052 LSB = Z.layout.frac_lo & 1;
1053
1054 /* set up hi and lo */
1055 lo = z_sign*Z.dbl;
1056 Z.layout.frac_lo += 1;
1057 hi = z_sign*Z.dbl;
1058
1059 switch(mode) {
1060 case TO_NEAREST:
1061 /* look at 3 guard bits to determine expected rounding */
1062 switch(guard) {
1063 case 0:
1064 case 1: case 2: case 3:
1065 expected = lo;
1066 break;
1067 case 4: /* tie: round to even */
1068 if (debug) printf("tie: LSB = %d\n", LSB);
1069 expected = (LSB == 0 ? lo : hi);
1070 break;
1071 case 5: case 6: case 7:
1072 expected = hi;
1073 break;
1074 default:
1075 assert("check_double_guarded_arithmetic_op: unexpected guard",
1076 FALSE);
1077 }
1078 break;
1079 case TO_ZERO:
1080 expected = lo;
1081 break;
1082 case TO_PLUS_INFINITY:
1083 if (guard == 0) {
1084 /* no rounding */
1085 expected = lo;
1086 } else {
1087 expected = (s == 1 ? hi : lo);
1088 }
1089 break;
1090 case TO_MINUS_INFINITY:
1091 if (guard == 0) {
1092 /* no rounding */
1093 expected = lo;
1094 } else {
1095 expected = (s == 1 ? lo : hi);
1096 }
1097 break;
1098 }
1099
1100 set_rounding_mode(mode);
1101
1102 /*
1103 ** do the double precision dual operation just for comparison
1104 ** when debugging
1105 */
1106 switch(op) {
1107 case FADD:
1108 BINOP("fadd");
1109 Res.dbl = fD;
1110 break;
1111 case FSUB:
1112 BINOP("fsub");
1113 Res.dbl = fD;
1114 break;
1115 case FMUL:
1116 BINOP("fmul");
1117 Res.dbl = fD;
1118 break;
1119 case FMADD:
1120 TERNOP("fmadd");
1121 Res.dbl = fD;
1122 break;
1123 case FMSUB:
1124 TERNOP("fmsub");
1125 Res.dbl = fD;
1126 break;
1127 case FNMADD:
1128 TERNOP("fnmadd");
1129 Res.dbl = fD;
1130 break;
1131 case FNMSUB:
1132 TERNOP("fnmsub");
1133 Res.dbl = fD;
1134 break;
1135 case FDIV:
1136 BINOP("fdiv");
1137 Res.dbl = fD;
1138 break;
1139 case FSQRT:
1140 UNOP("fsqrt");
1141 Res.dbl = fD;
1142 break;
1143 default:
1144 assert("check_double_guarded_arithmetic_op: unexpected op",
1145 FALSE);
1146 break;
1147 }
1148 #undef UNOP
1149 #undef BINOP
1150 #undef TERNOP
1151
1152 Exp.dbl = expected;
1153
1154 if ((Res.layout.sign != Exp.layout.sign) ||
1155 (Res.layout.exp != Exp.layout.exp) ||
1156 (Res.layout.frac_lo != Exp.layout.frac_lo) ||
1157 (Res.layout.frac_hi != Exp.layout.frac_hi)) {
1158 result = "FAILED";
1159 status = 1;
1160 } else {
1161 result = "PASSED";
1162 status = 0;
1163 }
1164
1165 printf("%s:%s:%s(%-13a",
1166 round_mode_name[mode], result, flt_op_names[op], fA);
1167 if (arg_count > 1) printf(", %-13a", fB);
1168 if (arg_count > 2) printf(", %-13a", fC);
1169 printf(") = %-13a", Res.dbl);
1170 if (status) printf("\n\texpected %a", Exp.dbl);
1171 putchar('\n');
1172
1173 if (debug) {
1174 print_double("hi", hi);
1175 print_double("lo", lo);
1176 print_double("expected", expected);
1177 print_double("got", Res.dbl);
1178 }
1179 }
1180
1181 return status;
1182 }
1183
test_float_arithmetic_ops()1184 int test_float_arithmetic_ops()
1185 {
1186 int status = 0;
1187 flt_op_t op;
1188
1189 /*
1190 ** choose FP operands whose result should be rounded to either
1191 ** lo or hi.
1192 */
1193
1194 printf("-------------------------- %s --------------------------\n",
1195 "test rounding of float operators without guard bits");
1196 for (op = FADDS; op <= FDIVS; op++) {
1197 status |= check_single_arithmetic_op(op);
1198 }
1199
1200 printf("-------------------------- %s --------------------------\n",
1201 "test rounding of float operators with guard bits");
1202 for (op = FADDS; op <= FNMSUBS; op++) {
1203 status |= check_single_guarded_arithmetic_op(op);
1204 }
1205
1206 printf("-------------------------- %s --------------------------\n",
1207 "test rounding of double operators with guard bits");
1208 for (op = FADD; op <= FSQRT; op++) {
1209 status |= check_double_guarded_arithmetic_op(op);
1210 }
1211 return status;
1212 }
1213
1214
1215 int
main()1216 main()
1217 {
1218 int status = 0;
1219
1220 init();
1221
1222 status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1223 status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1224 status |= test_int_to_float_convert("test (float)int convert");
1225 status |= test_int_to_float_convert("test (float)int convert");
1226
1227 #ifdef __powerpc64__
1228 status |= test_long_to_double_convert("test (double)long convert");
1229 #endif
1230 status |= test_float_arithmetic_ops();
1231 return status;
1232 }
1233