1 
2 #ifndef USED_AS_INCLUDE
3 
4 #include "../pub/libvex_basictypes.h"
5 #include <stdio.h>
6 #include <malloc.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <assert.h>
10 
11 
12 /* Test program for developing code for conversions between
13    x87 64-bit and 80-bit floats.
14 
15    80-bit format exists only for x86/x86-64, and so the routines
16    hardwire it as little-endian.  The 64-bit format (IEEE double)
17    could exist on any platform, little or big-endian and so we
18    have to take that into account.  IOW, these routines have to
19    work correctly when compiled on both big- and little-endian
20    targets, but the 80-bit images only ever have to exist in
21    little-endian format.
22 */
23 static void show_f80 ( UChar* );
24 static void show_f64 ( UChar* );
25 
26 static inline
read_bit_array(UChar * arr,UInt n)27 UInt read_bit_array ( UChar* arr, UInt n )
28 {
29    UChar c = arr[n >> 3];
30    c >>= (n&7);
31    return c & 1;
32 }
33 
34 static inline
write_bit_array(UChar * arr,UInt n,UInt b)35 void write_bit_array ( UChar* arr, UInt n, UInt b )
36 {
37    UChar c = arr[n >> 3];
38    c &= ~(1 << (n&7));
39    c |= ((b&1) << (n&7));
40    arr[n >> 3] = c;
41 }
42 
43 
convert_f80le_to_f64le_HW(UChar * f80,UChar * f64)44 static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
45 {
46   asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
47                 :
48                 : "r" (&f80[0]), "r" (&f64[0])
49                 : "memory" );
50 }
51 
convert_f64le_to_f80le_HW(UChar * f64,UChar * f80)52 static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
53 {
54   asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
55                 :
56                 : "r" (&f64[0]), "r" (&f80[0])
57                 : "memory" );
58 }
59 
60 #endif /* ndef USED_AS_INCLUDE */
61 
62 
63 
64 /* 80 and 64-bit floating point formats:
65 
66    80-bit:
67 
68     S  0       0-------0      zero
69     S  0       0X------X      denormals
70     S  1-7FFE  1X------X      normals (all normals have leading 1)
71     S  7FFF    10------0      infinity
72     S  7FFF    10X-----X      snan
73     S  7FFF    11X-----X      qnan
74 
75    S is the sign bit.  For runs X----X, at least one of the Xs must be
76    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
77    there is an explicitly represented leading 1, and a sign bit,
78    giving 80 in total.
79 
80    64-bit avoids the confusion of an explicitly represented leading 1
81    and so is simpler:
82 
83     S  0      0------0   zero
84     S  0      X------X   denormals
85     S  1-7FE  any        normals
86     S  7FF    0------0   infinity
87     S  7FF    0X-----X   snan
88     S  7FF    1X-----X   qnan
89 
90    Exponent is 11 bits, fractional part is 52 bits, and there is a
91    sign bit, giving 64 in total.
92 */
93 
94 /* Convert a IEEE754 double (64-bit) into an x87 extended double
95    (80-bit), mimicing the hardware fairly closely.  Both numbers are
96    stored little-endian.  Limitations, all of which could be fixed,
97    given some level of hassle:
98 
99    * Identity of NaNs is not preserved.
100 
101    See comments in the code for more details.
102 */
convert_f64le_to_f80le(UChar * f64,UChar * f80)103 static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
104 {
105    Bool  mantissaIsZero;
106    Int   bexp, i, j, shift;
107    UChar sign;
108 
109    sign = toUChar( (f64[7] >> 7) & 1 );
110    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
111    bexp &= 0x7FF;
112 
113    mantissaIsZero = False;
114    if (bexp == 0 || bexp == 0x7FF) {
115       /* We'll need to know whether or not the mantissa (bits 51:0) is
116          all zeroes in order to handle these cases.  So figure it
117          out. */
118       mantissaIsZero
119          = toBool(
120               (f64[6] & 0x0F) == 0
121               && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
122               && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
123            );
124    }
125 
126    /* If the exponent is zero, either we have a zero or a denormal.
127       Produce a zero.  This is a hack in that it forces denormals to
128       zero.  Could do better. */
129    if (bexp == 0) {
130       f80[9] = toUChar( sign << 7 );
131       f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
132              = f80[3] = f80[2] = f80[1] = f80[0] = 0;
133 
134       if (mantissaIsZero)
135          /* It really is zero, so that's all we can do. */
136          return;
137 
138       /* There is at least one 1-bit in the mantissa.  So it's a
139          potentially denormalised double -- but we can produce a
140          normalised long double.  Count the leading zeroes in the
141          mantissa so as to decide how much to bump the exponent down
142          by.  Note, this is SLOW. */
143       shift = 0;
144       for (i = 51; i >= 0; i--) {
145         if (read_bit_array(f64, i))
146            break;
147         shift++;
148       }
149 
150       /* and copy into place as many bits as we can get our hands on. */
151       j = 63;
152       for (i = 51 - shift; i >= 0; i--) {
153          write_bit_array( f80, j,
154      	 read_bit_array( f64, i ) );
155          j--;
156       }
157 
158       /* Set the exponent appropriately, and we're done. */
159       bexp -= shift;
160       bexp += (16383 - 1023);
161       f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
162       f80[8] = toUChar( bexp & 0xFF );
163       return;
164    }
165 
166    /* If the exponent is 7FF, this is either an Infinity, a SNaN or
167       QNaN, as determined by examining bits 51:0, thus:
168           0  ... 0    Inf
169           0X ... X    SNaN
170           1X ... X    QNaN
171       where at least one of the Xs is not zero.
172    */
173    if (bexp == 0x7FF) {
174       if (mantissaIsZero) {
175          /* Produce an appropriately signed infinity:
176             S 1--1 (15)  1  0--0 (63)
177          */
178          f80[9] = toUChar( (sign << 7) | 0x7F );
179          f80[8] = 0xFF;
180          f80[7] = 0x80;
181          f80[6] = f80[5] = f80[4] = f80[3]
182                 = f80[2] = f80[1] = f80[0] = 0;
183          return;
184       }
185       /* So it's either a QNaN or SNaN.  Distinguish by considering
186          bit 51.  Note, this destroys all the trailing bits
187          (identity?) of the NaN.  IEEE754 doesn't require preserving
188          these (it only requires that there be one QNaN value and one
189          SNaN value), but x87 does seem to have some ability to
190          preserve them.  Anyway, here, the NaN's identity is
191          destroyed.  Could be improved. */
192       if (f64[6] & 8) {
193          /* QNaN.  Make a QNaN:
194             S 1--1 (15)  1  1--1 (63)
195          */
196          f80[9] = toUChar( (sign << 7) | 0x7F );
197          f80[8] = 0xFF;
198          f80[7] = 0xFF;
199          f80[6] = f80[5] = f80[4] = f80[3]
200                 = f80[2] = f80[1] = f80[0] = 0xFF;
201       } else {
202          /* SNaN.  Make a SNaN:
203             S 1--1 (15)  0  1--1 (63)
204          */
205          f80[9] = toUChar( (sign << 7) | 0x7F );
206          f80[8] = 0xFF;
207          f80[7] = 0x7F;
208          f80[6] = f80[5] = f80[4] = f80[3]
209                 = f80[2] = f80[1] = f80[0] = 0xFF;
210       }
211       return;
212    }
213 
214    /* It's not a zero, denormal, infinity or nan.  So it must be a
215       normalised number.  Rebias the exponent and build the new
216       number.  */
217    bexp += (16383 - 1023);
218 
219    f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
220    f80[8] = toUChar( bexp & 0xFF );
221    f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
222                               | ((f64[5] >> 5) & 7) );
223    f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
224    f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
225    f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
226    f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
227    f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
228    f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
229    f80[0] = toUChar( 0 );
230 }
231 
232 
233 /* Convert a x87 extended double (80-bit) into an IEEE 754 double
234    (64-bit), mimicking the hardware fairly closely.  Both numbers are
235    stored little-endian.  Limitations, both of which could be fixed,
236    given some level of hassle:
237 
238    * Rounding following truncation could be a bit better.
239 
240    * Identity of NaNs is not preserved.
241 
242    See comments in the code for more details.
243 */
convert_f80le_to_f64le(UChar * f80,UChar * f64)244 static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
245 {
246    Bool  isInf;
247    Int   bexp, i, j;
248    UChar sign;
249 
250    sign = toUChar((f80[9] >> 7) & 1);
251    bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
252    bexp &= 0x7FFF;
253 
254    /* If the exponent is zero, either we have a zero or a denormal.
255       But an extended precision denormal becomes a double precision
256       zero, so in either case, just produce the appropriately signed
257       zero. */
258    if (bexp == 0) {
259       f64[7] = toUChar(sign << 7);
260       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
261       return;
262    }
263 
264    /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
265       QNaN, as determined by examining bits 62:0, thus:
266           0  ... 0    Inf
267           0X ... X    SNaN
268           1X ... X    QNaN
269       where at least one of the Xs is not zero.
270    */
271    if (bexp == 0x7FFF) {
272       isInf = toBool(
273                  (f80[7] & 0x7F) == 0
274                  && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
275                  && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
276                  && f80[0] == 0
277               );
278       if (isInf) {
279          if (0 == (f80[7] & 0x80))
280             goto wierd_NaN;
281          /* Produce an appropriately signed infinity:
282             S 1--1 (11)  0--0 (52)
283          */
284          f64[7] = toUChar((sign << 7) | 0x7F);
285          f64[6] = 0xF0;
286          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
287          return;
288       }
289       /* So it's either a QNaN or SNaN.  Distinguish by considering
290          bit 62.  Note, this destroys all the trailing bits
291          (identity?) of the NaN.  IEEE754 doesn't require preserving
292          these (it only requires that there be one QNaN value and one
293          SNaN value), but x87 does seem to have some ability to
294          preserve them.  Anyway, here, the NaN's identity is
295          destroyed.  Could be improved. */
296       if (f80[8] & 0x40) {
297          /* QNaN.  Make a QNaN:
298             S 1--1 (11)  1  1--1 (51)
299          */
300          f64[7] = toUChar((sign << 7) | 0x7F);
301          f64[6] = 0xFF;
302          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
303       } else {
304          /* SNaN.  Make a SNaN:
305             S 1--1 (11)  0  1--1 (51)
306          */
307          f64[7] = toUChar((sign << 7) | 0x7F);
308          f64[6] = 0xF7;
309          f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
310       }
311       return;
312    }
313 
314    /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
315       zero, the x87 FPU appears to consider the number denormalised
316       and converts it to a QNaN. */
317    if (0 == (f80[7] & 0x80)) {
318       wierd_NaN:
319       /* Strange hardware QNaN:
320          S 1--1 (11)  1  0--0 (51)
321       */
322       /* On a PIII, these QNaNs always appear with sign==1.  I have
323          no idea why. */
324       f64[7] = (1 /*sign*/ << 7) | 0x7F;
325       f64[6] = 0xF8;
326       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
327       return;
328    }
329 
330    /* It's not a zero, denormal, infinity or nan.  So it must be a
331       normalised number.  Rebias the exponent and consider. */
332    bexp -= (16383 - 1023);
333    if (bexp >= 0x7FF) {
334       /* It's too big for a double.  Construct an infinity. */
335       f64[7] = toUChar((sign << 7) | 0x7F);
336       f64[6] = 0xF0;
337       f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
338       return;
339    }
340 
341    if (bexp <= 0) {
342       /* It's too small for a normalised double.  First construct a
343          zero and then see if it can be improved into a denormal.  */
344       f64[7] = toUChar(sign << 7);
345       f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
346 
347       if (bexp < -52)
348          /* Too small even for a denormal. */
349          return;
350 
351       /* Ok, let's make a denormal.  Note, this is SLOW. */
352       /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
353          indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
354       /* bexp is in range -52 .. 0 inclusive */
355       for (i = 63; i >= 0; i--) {
356          j = i - 12 + bexp;
357          if (j < 0) break;
358          /* We shouldn't really call vassert from generated code. */
359          assert(j >= 0 && j < 52);
360          write_bit_array ( f64,
361                            j,
362                            read_bit_array ( f80, i ) );
363       }
364       /* and now we might have to round ... */
365       if (read_bit_array(f80, 10+1 - bexp) == 1)
366          goto do_rounding;
367 
368       return;
369    }
370 
371    /* Ok, it's a normalised number which is representable as a double.
372       Copy the exponent and mantissa into place. */
373    /*
374    for (i = 0; i < 52; i++)
375       write_bit_array ( f64,
376                         i,
377                         read_bit_array ( f80, i+11 ) );
378    */
379    f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
380    f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
381    f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
382    f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
383    f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
384    f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
385 
386    f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
387 
388    f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
389 
390    /* Now consider any rounding that needs to happen as a result of
391       truncating the mantissa. */
392    if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
393 
394       /* If the bottom bits of f80 are "100 0000 0000", then the
395          infinitely precise value is deemed to be mid-way between the
396          two closest representable values.  Since we're doing
397          round-to-nearest (the default mode), in that case it is the
398          bit immediately above which indicates whether we should round
399          upwards or not -- if 0, we don't.  All that is encapsulated
400          in the following simple test. */
401       if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
402          return;
403 
404       do_rounding:
405       /* Round upwards.  This is a kludge.  Once in every 2^24
406          roundings (statistically) the bottom three bytes are all 0xFF
407          and so we don't round at all.  Could be improved. */
408       if (f64[0] != 0xFF) {
409          f64[0]++;
410       }
411       else
412       if (f64[0] == 0xFF && f64[1] != 0xFF) {
413          f64[0] = 0;
414          f64[1]++;
415       }
416       else
417       if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
418          f64[0] = 0;
419          f64[1] = 0;
420          f64[2]++;
421       }
422       /* else we don't round, but we should. */
423    }
424 }
425 
426 
427 #ifndef USED_AS_INCLUDE
428 
429 //////////////
430 
show_f80(UChar * f80)431 static void show_f80 ( UChar* f80 )
432 {
433   Int i;
434   printf("%d ", read_bit_array(f80, 79));
435 
436   for (i = 78; i >= 64; i--)
437     printf("%d", read_bit_array(f80, i));
438 
439   printf(" %d ", read_bit_array(f80, 63));
440 
441   for (i = 62; i >= 0; i--)
442     printf("%d", read_bit_array(f80, i));
443 }
444 
show_f64le(UChar * f64)445 static void show_f64le ( UChar* f64 )
446 {
447   Int i;
448   printf("%d     ", read_bit_array(f64, 63));
449 
450   for (i = 62; i >= 52; i--)
451     printf("%d", read_bit_array(f64, i));
452 
453   printf("   ");
454   for (i = 51; i >= 0; i--)
455     printf("%d", read_bit_array(f64, i));
456 }
457 
458 //////////////
459 
460 
461 /* Convert f80 to a 64-bit IEEE double using both the hardware and the
462    soft version, and compare the results.  If they differ, print
463    details and return 1.  If they are identical, return 0.
464 */
do_80_to_64_test(Int test_no,UChar * f80,UChar * f64h,UChar * f64s)465 int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
466 {
467    Char buf64s[100], buf64h[100];
468    Bool same;
469    Int k;
470    convert_f80le_to_f64le_HW(f80, f64h);
471    convert_f80le_to_f64le(f80, f64s);
472    same = True;
473    for (k = 0; k < 8; k++) {
474       if (f64s[k] != f64h[k]) {
475          same = False; break;
476       }
477    }
478    /* bitwise identical */
479    if (same)
480       return 0;
481 
482    sprintf(buf64s, "%.16e", *(double*)f64s);
483    sprintf(buf64h, "%.16e", *(double*)f64h);
484 
485    /* Not bitwise identical, but pretty darn close */
486    if (0 == strcmp(buf64s, buf64h))
487       return 0;
488 
489     printf("\n");
490     printf("f80:  "); show_f80(f80); printf("\n");
491     printf("f64h: "); show_f64le(f64h); printf("\n");
492     printf("f64s: "); show_f64le(f64s); printf("\n");
493 
494     printf("[test %d]  %.16Le -> (hw %s, sw %s)\n",
495            test_no, *(long double*)f80,
496            buf64h, buf64s );
497 
498     return 1;
499 }
500 
501 
502 /* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
503    using both the hardware and the soft version, and compare the
504    results.  If they differ, print details and return 1.  If they are
505    identical, return 0.
506 */
do_64_to_80_test(Int test_no,UChar * f64,UChar * f80h,UChar * f80s)507 int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
508 {
509    Char buf80s[100], buf80h[100];
510    Bool same;
511    Int k;
512    convert_f64le_to_f80le_HW(f64, f80h);
513    convert_f64le_to_f80le(f64, f80s);
514    same = True;
515    for (k = 0; k < 10; k++) {
516       if (f80s[k] != f80h[k]) {
517          same = False; break;
518       }
519    }
520    /* bitwise identical */
521    if (same)
522       return 0;
523 
524    sprintf(buf80s, "%.20Le", *(long double*)f80s);
525    sprintf(buf80h, "%.20Le", *(long double*)f80h);
526 
527    /* Not bitwise identical, but pretty darn close */
528    if (0 == strcmp(buf80s, buf80h))
529       return 0;
530 
531     printf("\n");
532     printf("f64:  "); show_f64le(f64); printf("\n");
533     printf("f80h: "); show_f80(f80h); printf("\n");
534     printf("f80s: "); show_f80(f80s); printf("\n");
535 
536     printf("[test %d]  %.16e -> (hw %s, sw %s)\n",
537            test_no, *(double*)f64,
538            buf80h, buf80s );
539 
540     return 1;
541 }
542 
543 
544 
do_80_to_64_tests(void)545 void do_80_to_64_tests ( void )
546 {
547    UInt b9,b8,b7,i, j;
548    Int fails=0, tests=0;
549    UChar* f64h = malloc(8);
550    UChar* f64s = malloc(8);
551    UChar* f80  = malloc(10);
552    int STEP = 1;
553 
554    srandom(4343);
555 
556    /* Ten million random bit patterns */
557    for (i = 0; i < 10000000; i++) {
558      tests++;
559      for (j = 0; j < 10; j++)
560        f80[j] = (random() >> 7) & 255;
561 
562      fails += do_80_to_64_test(tests, f80, f64h, f64s);
563    }
564 
565    /* 2^24 numbers in which the first 24 bits are tested exhaustively
566       -- this covers the sign, exponent and leading part of the
567       mantissa. */
568    for (b9 = 0; b9 < 256; b9 += STEP) {
569       for (b8 = 0; b8 < 256; b8 += STEP) {
570          for (b7 = 0; b7 < 256; b7 += STEP) {
571            tests++;
572             for (i = 0; i < 10; i++)
573                f80[i] = 0;
574             for (i = 0; i < 8; i++)
575                f64h[i] = f64s[i] = 0;
576             f80[9] = b9;
577             f80[8] = b8;
578             f80[7] = b7;
579 
580     fails += do_80_to_64_test(tests, f80, f64h, f64s);
581    }}}
582 
583    printf("\n80 -> 64:  %d tests, %d fails\n\n", tests, fails);
584 }
585 
586 
do_64_to_80_tests(void)587 void do_64_to_80_tests ( void )
588 {
589    UInt b7,b6,b5,i, j;
590    Int fails=0, tests=0;
591    UChar* f80h = malloc(10);
592    UChar* f80s = malloc(10);
593    UChar* f64  = malloc(8);
594    int STEP = 1;
595 
596    srandom(2323);
597 
598    /* Ten million random bit patterns */
599    for (i = 0; i < 10000000; i++) {
600      tests++;
601      for (j = 0; j < 8; j++)
602        f64[j] = (random() >> 13) & 255;
603 
604      fails += do_64_to_80_test(tests, f64, f80h, f80s);
605    }
606 
607    /* 2^24 numbers in which the first 24 bits are tested exhaustively
608       -- this covers the sign, exponent and leading part of the
609       mantissa. */
610    for (b7 = 0; b7 < 256; b7 += STEP) {
611       for (b6 = 0; b6 < 256; b6 += STEP) {
612          for (b5 = 0; b5 < 256; b5 += STEP) {
613            tests++;
614             for (i = 0; i < 8; i++)
615                f64[i] = 0;
616             for (i = 0; i < 10; i++)
617                f80h[i] = f80s[i] = 0;
618             f64[7] = b7;
619             f64[6] = b6;
620             f64[5] = b5;
621 
622     fails += do_64_to_80_test(tests, f64, f80h, f80s);
623    }}}
624 
625    printf("\n64 -> 80:  %d tests, %d fails\n\n", tests, fails);
626 }
627 
628 
main(void)629 int main ( void )
630 {
631    do_80_to_64_tests();
632    do_64_to_80_tests();
633    return 0;
634 }
635 
636 #endif /* ndef USED_AS_INCLUDE */
637