1 
2 #include <assert.h>
3 #include <stdio.h>
4 #include <malloc.h>
5 
6 typedef unsigned int UInt;
7 typedef unsigned long long int ULong;
8 
9 typedef
10    struct {
11       double fres;
12       UInt cr;
13       UInt fpscr;
14    }
15    Result;
16 
17 
18 static void set_NEAREST ( void ) {
19    __asm__ __volatile__("mtfsb0 30 ; mtfsb0 31");
20 }
21 static void set_ZERO ( void ) {
22    __asm__ __volatile__("mtfsb0 30 ; mtfsb1 31");
23 }
24 static void set_PosINF ( void ) {
25    __asm__ __volatile__("mtfsb1 30 ; mtfsb0 31");
26 }
27 static void set_NegINF ( void ) {
28    __asm__ __volatile__("mtfsb1 30 ; mtfsb1 31");
29 }
30 
31 
32 static ULong double_as_ULong ( double d )
33 {
34    union { double dd; ULong ll; } u;
35    assert(sizeof(u) == 8);
36    u.dd = d;
37    return u.ll;
38 }
39 
40 static ULong round_with_mask ( ULong x, ULong mask )
41 {
42   if (mask == 1) {
43      switch (x & 1) {
44         case 0:  return x;
45         case 1:  return x+1;
46      }
47      assert(0);
48   }
49 
50   if (mask == 3) {
51      switch (x & 3) {
52         case 0:  return x;
53         case 1:  return x-1;
54         case 2:  return x+2;
55         case 3:  return x+1;
56      }
57      assert(0);
58   }
59 
60   if (mask == 7) {
61      switch (x & 7) {
62         case 0:  return x;
63         case 1:  return x-1;
64         case 2:  return x-2;
65         case 3:  return x-3;
66         case 4:  return x+4;
67         case 5:  return x+3;
68         case 6:  return x+2;
69         case 7:  return x+1;
70      }
71      assert(0);
72   }
73 
74   if (mask == 15) {
75      switch (x & 15) {
76         case 0:  return x;
77         case 1:  return x-1;
78         case 2:  return x-2;
79         case 3:  return x-3;
80         case 4:  return x-4;
81         case 5:  return x-5;
82         case 6:  return x-6;
83         case 7:  return x-7;
84         case 8:   return x+8;
85         case 9:   return x+7;
86         case 10:  return x+6;
87         case 11:  return x+5;
88         case 12:  return x+4;
89         case 13:  return x+3;
90         case 14:  return x+2;
91         case 15:  return x+1;
92      }
93      assert(0);
94   }
95 
96   assert(0);
97 }
98 
99 static void showResult ( Result r, ULong hideMask )
100 {
101   /* hidemask should have 1 for every result bit we **don't**
102      want to show.  viz should be all zeroes normally. */
103 #if 0
104   printf("(%016llx cr1 0x%01x fprf 0x%02x)",
105          double_as_ULong(r.fres) & ~hidemask,
106          (r.cr >> 24) & 0xF, (r.fpscr >> 12) & 0x1F);
107 #else
108   printf("(%016llx cr1 ... fprf ...)",
109 	 (hideMask == 0x1 || hideMask == 0x3 || hideMask == 0x7)
110 	 ? round_with_mask( double_as_ULong(r.fres), hideMask )
111          : double_as_ULong(r.fres) & ~hideMask
112         );
113 #endif
114 }
115 
116 
117 /* Give an insn string such as "fmadd %%f4, %%f1,%%f2,%%f3".  Args are
118    in f1, f2, f3, and result should go in f4. */
119 #define INSN(name,insn)                                                 \
120                                                                         \
121   static Result insn_##name ( double arg1, double arg2, double arg3 )   \
122   {                                                                     \
123      struct {                                                           \
124         /* 0  */ double a1;                                             \
125         /* 8  */ double a2;                                             \
126         /* 16 */ double a3;                                             \
127         /* 24 */ double res;                                            \
128         /* 32 */ UInt fpscr_after;                                      \
129         /* 36 */ UInt cr_after;                                         \
130      } foo;                                                             \
131      assert(sizeof(foo) == 40);                                         \
132      foo.a1 = foo.a2 = foo.a3 = foo.res = 0;                            \
133      foo.fpscr_after = foo.cr_after = 0;                                \
134      foo.a1 = arg1;                                                     \
135      foo.a2 = arg2;                                                     \
136      foo.a3 = arg3;                                                     \
137      __asm__ __volatile__(                                              \
138        "lfd  %%f1, 0(%0)\n\t"  /* a1 */                                 \
139        "lfd  %%f2, 8(%0)\n\t"  /* a2 */                                 \
140        "lfd  %%f3, 16(%0)\n\t" /* a3 */                                 \
141        insn "\n\t"                                                      \
142        "stfd %%f4, 24(%0)\n\t" /* res */                                \
143        "mffs %%f4\n\t"                                                  \
144        "addi %0,%0,32\n\t"                                              \
145        "stfiwx %%f4, %%r0,%0\n\t"  /* fpscr_after.  r0 reads as zero */ \
146        "addi %0,%0,-32\n\t"                                             \
147        "mfcr %%r31\n\t"                                                 \
148        "stw %%r31, 36(%0)"  /* cr_after */                              \
149        : /*out*/                                                        \
150        : /*in*/ "b" (&foo.a1)                                           \
151        : /*trash*/ "memory","cc", "fr1","fr2","fr3","fr4", "r31"        \
152      );                                                                 \
153      { Result result;                                                   \
154        result.fres  = foo.res;                                          \
155        result.cr    = foo.cr_after;                                     \
156        result.fpscr = foo.fpscr_after;                                  \
157        return result;                                                   \
158      }                                                                  \
159   }
160 
161 INSN(fabs,     "fabs     %%f4, %%f1");
162 INSN(fabs_,    "fabs.    %%f4, %%f1");
163 
164 INSN(fnabs,    "fnabs    %%f4, %%f1");
165 INSN(fnabs_,   "fnabs.   %%f4, %%f1");
166 
167 INSN(fadd,     "fadd     %%f4, %%f1,%%f2");
168 INSN(fadd_,    "fadd.    %%f4, %%f1,%%f2");
169 
170 INSN(fadds,    "fadds    %%f4, %%f1,%%f2");
171 INSN(fadds_,   "fadds.   %%f4, %%f1,%%f2");
172 
173 INSN(fcfid,    "fcfid    %%f4, %%f1");
174 INSN(fcfid_,   "fcfid.   %%f4, %%f1");
175 
176 INSN(fctid,    "fctid    %%f4, %%f1");
177 INSN(fctid_,   "fctid.   %%f4, %%f1");
178 
179 INSN(fctidz,   "fctidz   %%f4, %%f1");
180 INSN(fctidz_,  "fctidz.  %%f4, %%f1");
181 
182 INSN(fctiw,    "fctiw    %%f4, %%f1");
183 INSN(fctiw_,   "fctiw.   %%f4, %%f1");
184 
185 INSN(fctiwz,   "fctiwz   %%f4, %%f1");
186 INSN(fctiwz_,  "fctiwz.  %%f4, %%f1");
187 
188 INSN(fdiv,     "fdiv     %%f4, %%f1,%%f2");
189 INSN(fdiv_,    "fdiv.    %%f4, %%f1,%%f2");
190 
191 INSN(fdivs,    "fdivs    %%f4, %%f1,%%f2");
192 INSN(fdivs_,   "fdivs.   %%f4, %%f1,%%f2");
193 
194 INSN(fmadd,    "fmadd    %%f4, %%f1,%%f2,%%f3");
195 INSN(fmadd_,   "fmadd.   %%f4, %%f1,%%f2,%%f3");
196 
197 INSN(fmadds,   "fmadds   %%f4, %%f1,%%f2,%%f3");
198 INSN(fmadds_,  "fmadds.  %%f4, %%f1,%%f2,%%f3");
199 
200 INSN(fmr,      "fmr      %%f4, %%f1");
201 INSN(fmr_,     "fmr.     %%f4, %%f1");
202 
203 INSN(fmsub,    "fmsub    %%f4, %%f1,%%f2,%%f3");
204 INSN(fmsub_,   "fmsub.   %%f4, %%f1,%%f2,%%f3");
205 
206 INSN(fmsubs,   "fmsubs   %%f4, %%f1,%%f2,%%f3");
207 INSN(fmsubs_,  "fmsubs.  %%f4, %%f1,%%f2,%%f3");
208 
209 INSN(fmul,     "fmul     %%f4, %%f1,%%f2");
210 INSN(fmul_,    "fmul.    %%f4, %%f1,%%f2");
211 
212 INSN(fmuls,    "fmuls    %%f4, %%f1,%%f2");
213 INSN(fmuls_,   "fmuls.   %%f4, %%f1,%%f2");
214 
215 INSN(fneg,     "fneg     %%f4, %%f1");
216 INSN(fneg_,    "fneg.    %%f4, %%f1");
217 
218 INSN(fnmadd,   "fnmadd   %%f4, %%f1,%%f2,%%f3");
219 INSN(fnmadd_,  "fnmadd.  %%f4, %%f1,%%f2,%%f3");
220 
221 INSN(fnmadds,  "fnmadds  %%f4, %%f1,%%f2,%%f3");
222 INSN(fnmadds_, "fnmadds. %%f4, %%f1,%%f2,%%f3");
223 
224 INSN(fnmsub,   "fnmsub   %%f4, %%f1,%%f2,%%f3");
225 INSN(fnmsub_,  "fnmsub.  %%f4, %%f1,%%f2,%%f3");
226 
227 INSN(fnmsubs,  "fnmsubs  %%f4, %%f1,%%f2,%%f3");
228 INSN(fnmsubs_, "fnmsubs. %%f4, %%f1,%%f2,%%f3");
229 
230 INSN(fre,      "fre      %%f4, %%f1");
231 INSN(fre_,     "fre.     %%f4, %%f1");
232 
233 INSN(fres,     "fres     %%f4, %%f1");
234 INSN(fres_,    "fres.    %%f4, %%f1");
235 
236 INSN(frsqrte,  "frsqrte  %%f4, %%f1");
237 INSN(frsqrte_, "frsqrte. %%f4, %%f1");
238 
239 //INSN(frsqrtes, "frsqrtes %%f4, %%f1");
240 //INSN(frsqrtes_, "frsqrtes. %%f4, %%f1");
241 
242 INSN(frsp,     "frsp     %%f4, %%f1");
243 INSN(frsp_,    "frsp.    %%f4, %%f1");
244 
245 INSN(fsel,     "fsel     %%f4, %%f1,%%f2,%%f3");
246 INSN(fsel_,    "fsel.    %%f4, %%f1,%%f2,%%f3");
247 
248 INSN(fsqrt,    "fsqrt    %%f4, %%f1");
249 INSN(fsqrt_,   "fsqrt.   %%f4, %%f1");
250 
251 INSN(fsqrts,   "fsqrts   %%f4, %%f1");
252 INSN(fsqrts_,  "fsqrts.  %%f4, %%f1");
253 
254 INSN(fsub,     "fsub     %%f4, %%f1,%%f2");
255 INSN(fsub_,    "fsub.    %%f4, %%f1,%%f2");
256 
257 INSN(fsubs,    "fsubs    %%f4, %%f1,%%f2");
258 INSN(fsubs_,   "fsubs.   %%f4, %%f1,%%f2");
259 
260 
261 
262 void do_1_unary ( char* name,
263                   Result(*f)(double,double,double),
264                   double a1,
265                   ULong hideMask )
266 {
267    Result r;
268    printf("%8s: %016llx (%e)\n", name, double_as_ULong(a1), a1);
269    set_NEAREST();
270    r = f(a1, 0.0,0.0);
271    printf("        near "); showResult(r,hideMask); printf("\n");
272    set_ZERO();
273    r = f(a1, 0.0,0.0);
274    printf("        zero "); showResult(r,hideMask); printf("\n");
275    set_PosINF();
276    r = f(a1, 0.0,0.0);
277    printf("        +inf "); showResult(r,hideMask); printf("\n");
278    set_NegINF();
279    r = f(a1, 0.0,0.0);
280    printf("        -inf "); showResult(r,hideMask); printf("\n");
281 }
282 
283 void do_1_binary ( char* name,
284                    Result(*f)(double,double,double),
285                    double a1, double a2,
286                    ULong hideMask )
287 {
288    Result r;
289    printf("%8s: %016llx %016llx\n", name, double_as_ULong(a1),
290 	  double_as_ULong(a2));
291    set_NEAREST();
292    r = f(a1,a2, 0.0);
293    printf("        near "); showResult(r,hideMask); printf("\n");
294    set_ZERO();
295    r = f(a1,a2, 0.0);
296    printf("        zero "); showResult(r,hideMask); printf("\n");
297    set_PosINF();
298    r = f(a1,a2, 0.0);
299    printf("        +inf "); showResult(r,hideMask); printf("\n");
300    set_NegINF();
301    r = f(a1,a2, 0.0);
302    printf("        -inf "); showResult(r,hideMask); printf("\n");
303 }
304 
305 void do_1_ternary ( char* name,
306                     Result(*f)(double,double,double),
307                     double a1, double a2, double a3,
308                     ULong hideMask )
309 {
310    Result r;
311    printf("%8s: %016llx %016llx %016llx\n",
312           name, double_as_ULong(a1),
313 	  double_as_ULong(a2), double_as_ULong(a3));
314    set_NEAREST();
315    r = f(a1,a2,a3);
316    printf("        near "); showResult(r,hideMask); printf("\n");
317    set_ZERO();
318    r = f(a1,a2,a3);
319    printf("        zero "); showResult(r,hideMask); printf("\n");
320    set_PosINF();
321    r = f(a1,a2,a3);
322    printf("        +inf "); showResult(r,hideMask); printf("\n");
323    set_NegINF();
324    r = f(a1,a2,a3);
325    printf("        -inf "); showResult(r,hideMask); printf("\n");
326 }
327 
328 void do_N_unary ( char* name,
329                   Result(*f)(double,double,double),
330                   double* args,
331 		  int nargs,
332                   ULong hideMask )
333 {
334    int i;
335    for (i = 0; i < nargs; i++) {
336       do_1_unary( name, f, args[i], hideMask );
337    }
338 }
339 
340 void do_N_binary ( char* name,
341                    Result(*f)(double,double,double),
342                    double* args,
343                    int nargs,
344                    ULong hideMask )
345 {
346    int i, j;
347    for (i = 0; i < nargs; i++) {
348       for (j = 0; j < nargs; j++) {
349          do_1_binary( name, f, args[i], args[j], hideMask );
350       }
351    }
352 }
353 
354 void do_N_ternary ( char* name,
355                     Result(*f)(double,double,double),
356                     double* args,
357                     int nargs,
358                     ULong hideMask )
359 {
360    int i, j, k;
361    for (i = 0; i < nargs; i++) {
362       for (j = 0; j < nargs; j++) {
363          for (k = 0; k < nargs; k++) {
364             do_1_ternary( name, f, args[i], args[j], args[k], hideMask );
365          }
366       }
367    }
368 }
369 
370 int main ( void )
371 {
372   const ULong SHOW_ALL = 0;
373 
374   int     nargs    = 21;
375   int     nMacArgs = 11;
376 
377   double* args    = malloc(nargs * sizeof(double));
378   double* macArgs = malloc(nMacArgs * sizeof(double));
379 
380   args[0]  =  0.0;
381   args[1]  =  1.0 / 0.0; // inf
382   args[2]  = -args[1]; //  -inf
383   args[3]  = args[2]/args[2]; // nan
384   args[4]  = -args[3]; // -nan
385   args[5]  = -5e100;
386   args[6]  = -5e20;
387   args[7]  = -501.0;
388   args[8]  = -6.0;
389   args[9]  = -1.0;
390   args[10] = -2e-20;
391   args[11] = -2e-200;
392   args[12] =  2e-200;
393   args[13] =  2e-20;
394   args[14] =  1.0;
395   args[15] =  6.0;
396   args[16] =  501.0;
397   args[17] =  5e20;
398   args[18] =  5e100;
399   args[19] =  1.23e+5;
400   args[20] =  1.23e+14;
401 
402 #if 0
403   macArgs[0]  =  0.0;
404   macArgs[1]  = -5e100;
405   macArgs[2]  = -5e20;
406   macArgs[3]  = -501.0;
407   macArgs[4]  = -6.0;
408   macArgs[5]  = -1.0;
409   macArgs[6]  = -2e-20;
410   macArgs[7]  = -2e-200;
411   macArgs[8]  =  2e-200;
412   macArgs[9]  =  2e-20;
413   macArgs[10] =  1.0;
414   macArgs[11] =  6.0;
415   macArgs[12] =  501.0;
416   macArgs[13] =  5e20;
417   macArgs[14] =  5e100;
418   macArgs[15] =  1.23e+5;
419   macArgs[16] =  1.23e+14;
420 
421   //macArgs[17]  = args[3]; // nan
422   //macArgs[18]  = -args[3]; // -nan
423 #endif
424 
425   macArgs[0]  = 0.0;
426   macArgs[1]  = 1.0;
427   macArgs[2]  = 1.0 + (1.0/7.0);
428   macArgs[3]  = 6.01;
429   macArgs[4]  = 501.0;
430   macArgs[5]  = 31415927.0;
431   macArgs[6]  = - 1.0;
432   macArgs[7]  = - (1.0 + (1.0/7.0));
433   macArgs[8]  = - 6.01;
434   macArgs[9]  = - 501.0;
435   macArgs[10] = - 31415927.0;
436 
437 
438   do_N_unary("fmr",     insn_fmr,    args, nargs, SHOW_ALL);
439   do_N_unary("fmr_",    insn_fmr_,   args, nargs, SHOW_ALL);
440 
441   do_N_unary("fneg",    insn_fneg,   args, nargs, SHOW_ALL);
442   do_N_unary("fneg_",   insn_fneg_,  args, nargs, SHOW_ALL);
443 
444   do_N_unary("fabs",    insn_fabs,   args, nargs, SHOW_ALL);
445   do_N_unary("fabs_",   insn_fabs_,  args, nargs, SHOW_ALL);
446 
447   do_N_unary("fnabs",   insn_fnabs,  args, nargs, SHOW_ALL);
448   do_N_unary("fnabs_",  insn_fnabs_, args, nargs, SHOW_ALL);
449 
450 
451   do_N_binary("fadd",   insn_fadd,   args, nargs, SHOW_ALL);
452   do_N_binary("fadd_",  insn_fadd_,  args, nargs, SHOW_ALL);
453 
454   do_N_binary("fadds",  insn_fadds,  args, nargs, SHOW_ALL);
455   do_N_binary("fadds_", insn_fadds_, args, nargs, SHOW_ALL);
456 
457   do_N_binary("fdiv",   insn_fdiv,   args, nargs, SHOW_ALL);
458   do_N_binary("fdiv_",  insn_fdiv_,  args, nargs, SHOW_ALL);
459 
460   do_N_binary("fdivs",  insn_fdivs,  args, nargs, SHOW_ALL);
461   do_N_binary("fdivs_", insn_fdivs_, args, nargs, SHOW_ALL);
462 
463   do_N_binary("fmul",   insn_fmul,   args, nargs, SHOW_ALL);
464   do_N_binary("fmul_",  insn_fmul_,  args, nargs, SHOW_ALL);
465 
466   do_N_binary("fmuls",  insn_fmuls,  args, nargs, SHOW_ALL);
467   do_N_binary("fmuls_", insn_fmuls_, args, nargs, SHOW_ALL);
468 
469   do_N_binary("fsub",   insn_fsub,   args, nargs, SHOW_ALL);
470   do_N_binary("fsub_",  insn_fsub_,  args, nargs, SHOW_ALL);
471 
472   do_N_binary("fsubs",  insn_fsubs,  args, nargs, SHOW_ALL);
473   do_N_binary("fsubs_", insn_fsubs_, args, nargs, SHOW_ALL);
474 
475   //do_N_unary(fcfid, SHOW_ALL);
476   //do_N_unary(fcfid_, SHOW_ALL);
477 
478   //do_N_unary(fctid, SHOW_ALL);
479   //do_N_unary(fctid_, SHOW_ALL);
480 
481   //do_N_unary(fctidz, SHOW_ALL);
482   //do_N_unary(fctidz_, SHOW_ALL);
483 
484   do_N_unary("fctiw",  insn_fctiw,  args, nargs, 0xFFFFFFFF00000000ULL);
485   do_N_unary("fctiw_", insn_fctiw_, args, nargs, 0xFFFFFFFF00000000ULL);
486 
487   do_N_unary("fctiwz",  insn_fctiwz,  args, nargs, 0xFFFFFFFF00000000ULL);
488   do_N_unary("fctiwz_", insn_fctiwz_, args, nargs, 0xFFFFFFFF00000000ULL);
489 
490   do_N_ternary("fmadd",    insn_fmadd,    macArgs, nMacArgs, SHOW_ALL);
491   do_N_ternary("fmadd_",   insn_fmadd_,   macArgs, nMacArgs, SHOW_ALL);
492 
493   do_N_ternary("fmadds",   insn_fmadds,   macArgs, nMacArgs, SHOW_ALL);
494   do_N_ternary("fmadds_",  insn_fmadds_,  macArgs, nMacArgs, SHOW_ALL);
495 
496   do_N_ternary("fmsub",    insn_fmsub,    macArgs, nMacArgs, SHOW_ALL);
497   do_N_ternary("fmsub_",   insn_fmsub_,   macArgs, nMacArgs, SHOW_ALL);
498 
499   do_N_ternary("fmsubs",   insn_fmsubs,   macArgs, nMacArgs, SHOW_ALL);
500   do_N_ternary("fmsubs_",  insn_fmsubs_,  macArgs, nMacArgs, SHOW_ALL);
501 
502   do_N_ternary("fnmadd",   insn_fnmadd,   macArgs, nMacArgs, SHOW_ALL);
503   do_N_ternary("fnmadd_",  insn_fnmadd_,  macArgs, nMacArgs, SHOW_ALL);
504 
505   do_N_ternary("fnmadds",  insn_fnmadds,  macArgs, nMacArgs, SHOW_ALL);
506   do_N_ternary("fnmadds_", insn_fnmadds_, macArgs, nMacArgs, SHOW_ALL);
507 
508   do_N_ternary("fnmsub",   insn_fnmsub,   macArgs, nMacArgs, SHOW_ALL);
509   do_N_ternary("fnmsub_",  insn_fnmsub_,  macArgs, nMacArgs, SHOW_ALL);
510 
511   do_N_ternary("fnmsubs",  insn_fnmsubs,  macArgs, nMacArgs, SHOW_ALL);
512   do_N_ternary("fnmsubs_", insn_fnmsubs_, macArgs, nMacArgs, SHOW_ALL);
513 
514   //do_N_unary(fre, SHOW_ALL);
515   //do_N_unary(fre_, SHOW_ALL);
516 
517   do_N_unary("fres",  insn_fres,  args, nargs, 0x000001FFFFFFFFFFULL);
518   do_N_unary("fres_", insn_fres_, args, nargs, 0x000001FFFFFFFFFFULL);
519 
520   do_N_unary("frsqrte",  insn_frsqrte,  args, nargs, SHOW_ALL);
521   do_N_unary("frsqrte_", insn_frsqrte_, args, nargs, SHOW_ALL);
522 
523   // do_N_unary("frsqrtes",  insn_frsqrtes,  args, nargs, SHOW_ALL);
524   // do_N_unary("frsqrtes_", insn_frsqrtes_, args, nargs, SHOW_ALL);
525 
526   do_N_unary("frsp",  insn_frsp,  args, nargs, SHOW_ALL);
527   do_N_unary("frsp_", insn_frsp_, args, nargs, SHOW_ALL);
528 
529   do_N_ternary("fsel",  insn_fsel,  args, nargs, SHOW_ALL);
530   do_N_ternary("fsel_", insn_fsel_, args, nargs, SHOW_ALL);
531 
532   //do_N_unary(fsqrt, SHOW_ALL);
533   //do_N_unary(fsqrt_, SHOW_ALL);
534 
535   //do_N_unary(fsqrts, SHOW_ALL);
536   //do_N_unary(fsqrts_, SHOW_ALL);
537 
538   return 0;
539 }
540