1 /*  Copyright (C) 2011 IBM
2 
3  Author: Maynard Johnson <maynardj@us.ibm.com>
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License as
7  published by the Free Software Foundation; either version 2 of the
8  License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307, USA.
19 
20  The GNU General Public License is contained in the file COPYING.
21  */
22 
23 #ifdef HAS_VSX
24 
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <malloc.h>
30 #include <altivec.h>
31 #include <math.h>
32 #include <unistd.h>    // getopt
33 
34 #ifndef __powerpc64__
35 typedef uint32_t HWord_t;
36 #else
37 typedef uint64_t HWord_t;
38 #endif /* __powerpc64__ */
39 
40 #ifdef VGP_ppc64le_linux
41 #define isLE 1
42 #else
43 #define isLE 0
44 #endif
45 
46 typedef unsigned char Bool;
47 #define True 1
48 #define False 0
49 register HWord_t r14 __asm__ ("r14");
50 register HWord_t r15 __asm__ ("r15");
51 register HWord_t r16 __asm__ ("r16");
52 register HWord_t r17 __asm__ ("r17");
53 register double f14 __asm__ ("fr14");
54 register double f15 __asm__ ("fr15");
55 register double f16 __asm__ ("fr16");
56 register double f17 __asm__ ("fr17");
57 
58 static volatile unsigned int div_flags, div_xer;
59 
60 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
61 
62 #define SET_CR(_arg) \
63       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
64 
65 #define SET_XER(_arg) \
66       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
67 
68 #define GET_CR(_lval) \
69       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
70 
71 #define GET_XER(_lval) \
72       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
73 
74 #define GET_CR_XER(_lval_cr,_lval_xer) \
75    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
76 
77 #define SET_CR_ZERO \
78       SET_CR(0)
79 
80 #define SET_XER_ZERO \
81       SET_XER(0)
82 
83 #define SET_CR_XER_ZERO \
84    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
85 
86 #define SET_FPSCR_ZERO \
87    do { double _d = 0.0; \
88         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
89    } while (0)
90 
91 
92 typedef void (*test_func_t)(void);
93 typedef struct test_table test_table_t;
94 
95 /* Defines for the instructiion groups, use bit field to identify */
96 #define SCALAR_DIV_INST    0x0001
97 #define OTHER_INST  0x0002
98 
99 /* These functions below that construct a table of floating point
100  * values were lifted from none/tests/ppc32/jm-insns.c.
101  */
102 
103 #if defined (DEBUG_ARGS_BUILD)
104 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
105 #else
106 #define AB_DPRINTF(fmt, args...) do { } while (0)
107 #endif
108 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)109 static inline void register_farg (void *farg,
110                                   int s, uint16_t _exp, uint64_t mant)
111 {
112    uint64_t tmp;
113 
114    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
115    *(uint64_t *)farg = tmp;
116    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
117               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
118 }
119 
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)120 static inline void register_sp_farg (void *farg,
121                                      int s, uint16_t _exp, uint32_t mant)
122 {
123    uint32_t tmp;
124    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
125    *(uint32_t *)farg = tmp;
126 }
127 
128 
129 typedef struct fp_test_args {
130    int fra_idx;
131    int frb_idx;
132 } fp_test_args_t;
133 
134 
135 fp_test_args_t two_arg_fp_tests[] = {
136                                      {8, 8},
137                                      {8, 14},
138                                      {15, 16},
139                                      {8, 5},
140                                      {8, 4},
141                                      {8, 7},
142                                      {8, 9},
143                                      {8, 11},
144                                      {14, 8},
145                                      {14, 14},
146                                      {14, 6},
147                                      {14, 5},
148                                      {14, 4},
149                                      {14, 7},
150                                      {14, 9},
151                                      {14, 11},
152                                      {6, 8},
153                                      {6, 14},
154                                      {6, 6},
155                                      {6, 5},
156                                      {6, 4},
157                                      {6, 7},
158                                      {6, 9},
159                                      {6, 11},
160                                      {5, 8},
161                                      {5, 14},
162                                      {5, 6},
163                                      {5, 5},
164                                      {5, 4},
165                                      {5, 7},
166                                      {5, 9},
167                                      {5, 11},
168                                      {4, 8},
169                                      {4, 14},
170                                      {4, 6},
171                                      {4, 5},
172                                      {4, 1},
173                                      {4, 7},
174                                      {4, 9},
175                                      {4, 11},
176                                      {7, 8},
177                                      {7, 14},
178                                      {7, 6},
179                                      {7, 5},
180                                      {7, 4},
181                                      {7, 7},
182                                      {7, 9},
183                                      {7, 11},
184                                      {10, 8},
185                                      {10, 14},
186                                      {12, 6},
187                                      {12, 5},
188                                      {10, 4},
189                                      {10, 7},
190                                      {10, 9},
191                                      {10, 11},
192                                      {12, 8 },
193                                      {12, 14},
194                                      {12, 6},
195                                      {15, 16},
196                                      {15, 16},
197                                      {9, 11},
198                                      {11, 11},
199                                      {11, 12},
200                                      {16, 18},
201                                      {17, 16},
202                                      {19, 19},
203                                      {19, 18}
204 };
205 
206 
207 static int nb_special_fargs;
208 static double * spec_fargs;
209 static float * spec_sp_fargs;
210 
build_special_fargs_table(void)211 static void build_special_fargs_table(void)
212 {
213 /*
214   Entry  Sign Exp   fraction                  Special value
215    0      0   3fd   0x8000000000000ULL         Positive finite number
216    1      0   404   0xf000000000000ULL         ...
217    2      0   001   0x8000000b77501ULL         ...
218    3      0   7fe   0x800000000051bULL         ...
219    4      0   012   0x3214569900000ULL         ...
220    5      0   000   0x0000000000000ULL         +0.0 (+zero)
221    6      1   000   0x0000000000000ULL         -0.0 (-zero)
222    7      0   7ff   0x0000000000000ULL         +infinity
223    8      1   7ff   0x0000000000000ULL         -infinity
224    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
225    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
226    11     0   7ff   0x8000000000000ULL         +QNaN
227    12     1   7ff   0x8000000000000ULL         -QNaN
228    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
229    14     1   40d   0x0650f5a07b353ULL         Negative finite number
230    15     0   412   0x32585a9900000ULL         A few more positive finite numbers
231    16     0   413   0x82511a2000000ULL         ...
232    17  . . . . . . . . . . . . . . . . . . . . . . .
233    18  . . . . . . . . . . . . . . . . . . . . . . .
234    19  . . . . . . . . . . . . . . . . . . . . . . .
235 */
236 
237    uint64_t mant;
238    uint32_t mant_sp;
239    uint16_t _exp;
240    int s;
241    int j, i = 0;
242 
243    if (spec_fargs)
244       return;
245 
246    spec_fargs = malloc( 20 * sizeof(double) );
247    spec_sp_fargs = malloc( 20 * sizeof(float) );
248 
249    // #0
250    s = 0;
251    _exp = 0x3fd;
252    mant = 0x8000000000000ULL;
253    register_farg(&spec_fargs[i++], s, _exp, mant);
254 
255    // #1
256    s = 0;
257    _exp = 0x404;
258    mant = 0xf000000000000ULL;
259    register_farg(&spec_fargs[i++], s, _exp, mant);
260 
261    // #2
262    s = 0;
263    _exp = 0x001;
264    mant = 0x8000000b77501ULL;
265    register_farg(&spec_fargs[i++], s, _exp, mant);
266 
267    // #3
268    s = 0;
269    _exp = 0x7fe;
270    mant = 0x800000000051bULL;
271    register_farg(&spec_fargs[i++], s, _exp, mant);
272 
273    // #4
274    s = 0;
275    _exp = 0x012;
276    mant = 0x3214569900000ULL;
277    register_farg(&spec_fargs[i++], s, _exp, mant);
278 
279 
280    /* Special values */
281    /* +0.0      : 0 0x000 0x0000000000000 */
282    // #5
283    s = 0;
284    _exp = 0x000;
285    mant = 0x0000000000000ULL;
286    register_farg(&spec_fargs[i++], s, _exp, mant);
287 
288    /* -0.0      : 1 0x000 0x0000000000000 */
289    // #6
290    s = 1;
291    _exp = 0x000;
292    mant = 0x0000000000000ULL;
293    register_farg(&spec_fargs[i++], s, _exp, mant);
294 
295    /* +infinity : 0 0x7FF 0x0000000000000  */
296    // #7
297    s = 0;
298    _exp = 0x7FF;
299    mant = 0x0000000000000ULL;
300    register_farg(&spec_fargs[i++], s, _exp, mant);
301 
302    /* -infinity : 1 0x7FF 0x0000000000000 */
303    // #8
304    s = 1;
305    _exp = 0x7FF;
306    mant = 0x0000000000000ULL;
307    register_farg(&spec_fargs[i++], s, _exp, mant);
308 
309    /*
310     * This comment applies to values #9 and #10 below:
311     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
312     * so we can't just copy the double-precision value to the corresponding slot in the
313     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
314     * have to manually set the bits using register_sp_farg().
315     */
316 
317    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
318    // #9
319    s = 0;
320    _exp = 0x7FF;
321    mant = 0x7FFFFFFFFFFFFULL;
322    register_farg(&spec_fargs[i++], s, _exp, mant);
323    _exp = 0xff;
324    mant_sp = 0x3FFFFF;
325    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
326 
327    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
328    // #10
329    s = 1;
330    _exp = 0x7FF;
331    mant = 0x7FFFFFFFFFFFFULL;
332    register_farg(&spec_fargs[i++], s, _exp, mant);
333    _exp = 0xff;
334    mant_sp = 0x3FFFFF;
335    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
336 
337    /* +QNaN     : 0 0x7FF 0x8000000000000 */
338    // #11
339    s = 0;
340    _exp = 0x7FF;
341    mant = 0x8000000000000ULL;
342    register_farg(&spec_fargs[i++], s, _exp, mant);
343 
344    /* -QNaN     : 1 0x7FF 0x8000000000000 */
345    // #12
346    s = 1;
347    _exp = 0x7FF;
348    mant = 0x8000000000000ULL;
349    register_farg(&spec_fargs[i++], s, _exp, mant);
350 
351    /* denormalized value */
352    // #13
353    s = 1;
354    _exp = 0x000;
355    mant = 0x8340000078000ULL;
356    register_farg(&spec_fargs[i++], s, _exp, mant);
357 
358    /* Negative finite number */
359    // #14
360    s = 1;
361    _exp = 0x40d;
362    mant = 0x0650f5a07b353ULL;
363    register_farg(&spec_fargs[i++], s, _exp, mant);
364 
365    /* A few positive finite numbers ... */
366    // #15
367    s = 0;
368    _exp = 0x412;
369    mant = 0x32585a9900000ULL;
370    register_farg(&spec_fargs[i++], s, _exp, mant);
371 
372    // #16
373    s = 0;
374    _exp = 0x413;
375    mant = 0x82511a2000000ULL;
376    register_farg(&spec_fargs[i++], s, _exp, mant);
377 
378    // #17
379    s = 0;
380    _exp = 0x403;
381    mant = 0x12ef5a9300000ULL;
382    register_farg(&spec_fargs[i++], s, _exp, mant);
383 
384    // #18
385    s = 0;
386    _exp = 0x405;
387    mant = 0x14bf5d2300000ULL;
388    register_farg(&spec_fargs[i++], s, _exp, mant);
389 
390    // #19
391    s = 0;
392    _exp = 0x409;
393    mant = 0x76bf982440000ULL;
394    register_farg(&spec_fargs[i++], s, _exp, mant);
395 
396    nb_special_fargs = i;
397    for (j = 0; j < i; j++) {
398       if (!(j == 9 || j == 10))
399          spec_sp_fargs[j] = spec_fargs[j];
400    }
401 }
402 
403 
404 struct test_table
405 {
406    test_func_t test_category;
407    char * name;
408    unsigned int test_group;
409 };
410 
411 /*  Type of input for floating point operations.*/
412 typedef enum {
413    SINGLE_TEST,
414    DOUBLE_TEST
415 } precision_type_t;
416 
417 typedef enum {
418    VX_SCALAR_CONV_TO_WORD,
419    VX_CONV_TO_SINGLE,
420    VX_CONV_TO_DOUBLE,
421    VX_ESTIMATE,
422    VX_DEFAULT
423 } vx_fp_test_type;
424 
425 static vector unsigned int vec_out, vec_inA, vec_inB;
426 
427 /* This function is for checking the reciprocal and reciprocal square root
428  * estimate instructions.
429  */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)430 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
431 {
432    /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
433     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
434     * does an actual reciprocal calculation versus estimation, so the answer we get back from
435     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
436     * precision) and the estimate may still be within expected tolerances.  On top of that,
437     * we can't count on these estimates always being the same across implementations.
438     * For example, with the fre[s] instruction (which should be correct to within one part
439     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
440     * one implementation could return 1.0111_1111_0000 and another implementation could return
441     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
442     * single bit in common.
443     *
444     * The upshot is we can't validate the VEX output for these instructions by comparing against
445     * stored bit patterns.  We must check that the result is within expected tolerances.
446     */
447 
448 
449    /* A mask to be used for validation as a last resort.
450     * Only use 12 bits of precision for reasons discussed above.
451     */
452 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
453 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
454 
455    Bool result = False;
456    Bool dp_test = type == DOUBLE_TEST;
457    double src_dp, res_dp;
458    float src_sp, res_sp;
459    src_dp = res_dp = 0;
460    src_sp = res_sp = 0;
461 #define SRC (dp_test ? src_dp : src_sp)
462 #define RES (dp_test ? res_dp : res_sp)
463    Bool src_is_negative = False;
464    Bool res_is_negative = False;
465    unsigned long long * dst_dp = NULL;
466    unsigned int * dst_sp = NULL;
467    if (dp_test) {
468       unsigned long long * src_dp_ull;
469       dst_dp = (unsigned long long *) &vec_out;
470       src_dp = spec_fargs[idx];
471       src_dp_ull = (unsigned long long *) &src_dp;
472       src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
473       res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
474       memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
475    } else {
476       unsigned int * src_sp_uint;
477       dst_sp = (unsigned int *) &vec_out;
478       src_sp = spec_sp_fargs[idx];
479       src_sp_uint = (unsigned int *) &src_sp;
480       src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
481       res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
482       memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
483    }
484 
485    // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
486    if (isnan(SRC))
487       return isnan(RES);
488    if (fpclassify(SRC) == FP_ZERO)
489       return isinf(RES);
490    if (!src_is_negative && isinf(SRC))
491       return !res_is_negative && (fpclassify(RES) == FP_ZERO);
492    if (is_rsqrte) {
493       if (src_is_negative)
494          return isnan(RES);
495    } else {
496       if (src_is_negative && isinf(SRC))
497          return res_is_negative && (fpclassify(RES) == FP_ZERO);
498    }
499    if (dp_test) {
500       double calc_diff;
501       double real_diff;
502       double recip_divisor;
503       double div_result;
504       double calc_diff_tmp;
505 
506       if (is_rsqrte)
507          recip_divisor = sqrt(src_dp);
508       else
509          recip_divisor = src_dp;
510 
511       div_result = 1.0/recip_divisor;
512       calc_diff_tmp = recip_divisor * 16384.0;
513       if (isnormal(calc_diff_tmp)) {
514          calc_diff = fabs(1.0/calc_diff_tmp);
515          real_diff = fabs(res_dp - div_result);
516          result = ( ( res_dp == div_result )
517                   || ( real_diff <= calc_diff ) );
518       } else {
519          /* Unable to compute theoretical difference, so we fall back to masking out
520           * un-precise bits.
521           */
522          unsigned long long * div_result_dp = (unsigned long long *) &div_result;
523          result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
524       }
525       /* For debug use . . .
526          if (!result) {
527              unsigned long long * dv = &div_result;
528              unsigned long long * rd = &real_diff;
529              unsigned long long * cd = &calc_diff;
530              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
531        *dv, *rd, *cd);
532           }
533        */
534    } else {  // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
535       float calc_diff;
536       float real_diff;
537       float div_result;
538       float calc_diff_tmp;
539       float recip_divisor = sqrt(src_sp);
540 
541       div_result = 1.0/recip_divisor;
542       calc_diff_tmp = recip_divisor * 16384.0;
543       if (isnormal(calc_diff_tmp)) {
544          calc_diff = fabsf(1.0/calc_diff_tmp);
545          real_diff = fabsf(res_sp - div_result);
546          result = ( ( res_sp == div_result )
547                   || ( real_diff <= calc_diff ) );
548       } else {
549          /* Unable to compute theoretical difference, so we fall back to masking out
550           * un-precise bits.
551           */
552          unsigned int * div_result_sp = (unsigned int *) &div_result;
553          result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
554       }
555       /* For debug use . . .
556          if (!result) {
557              unsigned long long * dv = &div_result;
558              unsigned long long * rd = &real_diff;
559              unsigned long long * cd = &calc_diff;
560              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
561        *dv, *rd, *cd);
562           }
563        */
564    }
565    return result;
566 }
567 
568 typedef struct vx_fp_test
569 {
570    test_func_t test_func;
571    const char * name;
572    fp_test_args_t * targs;
573    int num_tests;
574    precision_type_t precision;
575    vx_fp_test_type type;
576    const char * op;
577 } vx_fp_test_t;
578 
579 
580 static Bool do_dot;
581 
test_xvredp(void)582 static void test_xvredp(void)
583 {
584    __asm__ __volatile__ ("xvredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
585 }
586 
test_xsredp(void)587 static void test_xsredp(void)
588 {
589    __asm__ __volatile__ ("xsredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
590 }
591 
test_xvrsqrtedp(void)592 static void test_xvrsqrtedp(void)
593 {
594    __asm__ __volatile__ ("xvrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
595 }
596 
test_xsrsqrtedp(void)597 static void test_xsrsqrtedp(void)
598 {
599    __asm__ __volatile__ ("xsrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600 }
601 
test_xvrsqrtesp(void)602 static void test_xvrsqrtesp(void)
603 {
604    __asm__ __volatile__ ("xvrsqrtesp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
605 }
606 
test_xstsqrtdp(void)607 static void test_xstsqrtdp(void)
608 {
609    __asm__ __volatile__ ("xstsqrtdp   cr1, %x0" : : "wa" (vec_inB));
610 }
611 
test_xvtsqrtdp(void)612 static void test_xvtsqrtdp(void)
613 {
614    __asm__ __volatile__ ("xvtsqrtdp   cr1, %x0" : : "wa" (vec_inB));
615 }
616 
test_xvtsqrtsp(void)617 static void test_xvtsqrtsp(void)
618 {
619    __asm__ __volatile__ ("xvtsqrtsp   cr1, %x0" : : "wa" (vec_inB));
620 }
621 
test_xvsqrtdp(void)622 static void test_xvsqrtdp(void)
623 {
624    __asm__ __volatile__ ("xvsqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
625 }
626 
test_xvsqrtsp(void)627 static void test_xvsqrtsp(void)
628 {
629    __asm__ __volatile__ ("xvsqrtsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
630 }
631 
test_xvtdivdp(void)632 static void test_xvtdivdp(void)
633 {
634    __asm__ __volatile__ ("xvtdivdp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
635 }
636 
test_xvtdivsp(void)637 static void test_xvtdivsp(void)
638 {
639    __asm__ __volatile__ ("xvtdivsp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
640 }
641 
test_xscvdpsp(void)642 static void test_xscvdpsp(void)
643 {
644    __asm__ __volatile__ ("xscvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
645 }
646 
test_xscvdpuxws(void)647 static void test_xscvdpuxws(void)
648 {
649    __asm__ __volatile__ ("xscvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
650 }
651 
test_xscvspdp(void)652 static void test_xscvspdp(void)
653 {
654    __asm__ __volatile__ ("xscvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
655 }
656 
test_xvcvdpsp(void)657 static void test_xvcvdpsp(void)
658 {
659    __asm__ __volatile__ ("xvcvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
660 }
661 
test_xvcvdpuxds(void)662 static void test_xvcvdpuxds(void)
663 {
664    __asm__ __volatile__ ("xvcvdpuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
665 }
666 
test_xvcvdpuxws(void)667 static void test_xvcvdpuxws(void)
668 {
669    __asm__ __volatile__ ("xvcvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
670 }
671 
test_xvcvspdp(void)672 static void test_xvcvspdp(void)
673 {
674    __asm__ __volatile__ ("xvcvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
675 }
676 
test_xvcvspsxds(void)677 static void test_xvcvspsxds(void)
678 {
679    __asm__ __volatile__ ("xvcvspsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
680 }
681 
test_xvcvspuxds(void)682 static void test_xvcvspuxds(void)
683 {
684    __asm__ __volatile__ ("xvcvspuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
685 }
686 
test_xvcvdpsxds(void)687 static void test_xvcvdpsxds(void)
688 {
689    __asm__ __volatile__ ("xvcvdpsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
690 }
691 
test_xvcvspuxws(void)692 static void test_xvcvspuxws(void)
693 {
694    __asm__ __volatile__ ("xvcvspuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
695 }
696 
test_xvcvsxddp(void)697 static void test_xvcvsxddp(void)
698 {
699    __asm__ __volatile__ ("xvcvsxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
700 }
701 
test_xvcvuxddp(void)702 static void test_xvcvuxddp(void)
703 {
704    __asm__ __volatile__ ("xvcvuxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
705 }
706 
test_xvcvsxdsp(void)707 static void test_xvcvsxdsp(void)
708 {
709    __asm__ __volatile__ ("xvcvsxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
710 }
711 
test_xvcvuxdsp(void)712 static void test_xvcvuxdsp(void)
713 {
714    __asm__ __volatile__ ("xvcvuxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
715 }
716 
test_xvcvsxwdp(void)717 static void test_xvcvsxwdp(void)
718 {
719    __asm__ __volatile__ ("xvcvsxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
720 }
721 
test_xvcvuxwdp(void)722 static void test_xvcvuxwdp(void)
723 {
724    __asm__ __volatile__ ("xvcvuxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
725 }
726 
test_xvcvsxwsp(void)727 static void test_xvcvsxwsp(void)
728 {
729    __asm__ __volatile__ ("xvcvsxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
730 }
731 
test_xvcvuxwsp(void)732 static void test_xvcvuxwsp(void)
733 {
734    __asm__ __volatile__ ("xvcvuxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
735 }
736 
test_xsrdpic(void)737 static void test_xsrdpic(void)
738 {
739    __asm__ __volatile__ ("xsrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
740 }
741 
test_xsrdpiz(void)742 static void test_xsrdpiz(void)
743 {
744    __asm__ __volatile__ ("xsrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
745 }
746 
test_xsrdpi(void)747 static void test_xsrdpi(void)
748 {
749    __asm__ __volatile__ ("xsrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
750 }
751 
test_xvabsdp(void)752 static void test_xvabsdp(void)
753 {
754    __asm__ __volatile__ ("xvabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
755 }
756 
test_xvnabsdp(void)757 static void test_xvnabsdp(void)
758 {
759    __asm__ __volatile__ ("xvnabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
760 }
761 
test_xvnegdp(void)762 static void test_xvnegdp(void)
763 {
764    __asm__ __volatile__ ("xvnegdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
765 }
766 
test_xvabssp(void)767 static void test_xvabssp(void)
768 {
769    __asm__ __volatile__ ("xvabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
770 }
771 
test_xvnabssp(void)772 static void test_xvnabssp(void)
773 {
774    __asm__ __volatile__ ("xvnabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
775 }
776 
test_xvrdpi(void)777 static void test_xvrdpi(void)
778 {
779    __asm__ __volatile__ ("xvrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
780 }
781 
test_xvrdpic(void)782 static void test_xvrdpic(void)
783 {
784    __asm__ __volatile__ ("xvrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
785 }
786 
test_xvrdpim(void)787 static void test_xvrdpim(void)
788 {
789    __asm__ __volatile__ ("xvrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
790 }
791 
test_xvrdpip(void)792 static void test_xvrdpip(void)
793 {
794    __asm__ __volatile__ ("xvrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
795 }
796 
test_xvrdpiz(void)797 static void test_xvrdpiz(void)
798 {
799    __asm__ __volatile__ ("xvrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
800 }
801 
test_xvrspi(void)802 static void test_xvrspi(void)
803 {
804    __asm__ __volatile__ ("xvrspi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
805 }
806 
test_xvrspic(void)807 static void test_xvrspic(void)
808 {
809    __asm__ __volatile__ ("xvrspic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
810 }
811 
test_xvrspim(void)812 static void test_xvrspim(void)
813 {
814    __asm__ __volatile__ ("xvrspim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
815 }
816 
test_xvrspip(void)817 static void test_xvrspip(void)
818 {
819    __asm__ __volatile__ ("xvrspip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
820 }
821 
test_xvrspiz(void)822 static void test_xvrspiz(void)
823 {
824    __asm__ __volatile__ ("xvrspiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
825 }
826 
827 static vx_fp_test_t
828 vsx_one_fp_arg_tests[] = {
829                                 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
830                                 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
831                                 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
832                                 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
833                                 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
834                                 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
835                                 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
836                                 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
837                                 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
838                                 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
839                                 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
840                                 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
841                                 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
842                                 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
843                                 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
844                                 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
845                                 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
846                                 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
847                                 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
848                                 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
849                                 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
850                                 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
851                                 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
852                                 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
853                                 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
854                                 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
855                                 { &test_xvrdpi,  "xvrdpi",  NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
856                                 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
857                                 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
858                                 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
859                                 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
860                                 { &test_xvrspi,  "xvrspi",  NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
861                                 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
862                                 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
863                                 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
864                                 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
865                                 { NULL, NULL, NULL, 0, 0, 0, NULL}
866 };
867 
868 static vx_fp_test_t
869 vx_tdivORtsqrt_tests[] = {
870                           { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
871                           { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
872                           { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
873                           { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
874                           { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
875                           { NULL, NULL, NULL, 0 , 0, 0, NULL}
876 };
877 
878 static unsigned long long doubleWord[] = { 0,
879                                   0xffffffff00000000LL,
880                                   0x00000000ffffffffLL,
881                                   0xffffffffffffffffLL,
882                                   0x89abcde123456789LL,
883                                   0x0102030405060708LL,
884                                   0x00000000a0b1c2d3LL,
885                                   0x1111222233334444LL
886 };
887 
888 static unsigned int singleWord[] = {0,
889                                   0xffff0000,
890                                   0x0000ffff,
891                                   0xffffffff,
892                                   0x89a73522,
893                                   0x01020304,
894                                   0x0000abcd,
895                                   0x11223344
896 };
897 
898 typedef struct vx_intToFp_test
899 {
900    test_func_t test_func;
901    const char * name;
902    void * targs;
903    int num_tests;
904    precision_type_t precision;
905    vx_fp_test_type type;
906 } vx_intToFp_test_t;
907 
908 static vx_intToFp_test_t
909 intToFp_tests[] = {
910                    { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
911                    { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
912                    { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
913                    { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
914                    { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
915                    { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
916                    { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
917                    { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
918                    { NULL, NULL, NULL, 0, 0 }
919 };
920 
921 static Bool do_OE;
922 typedef enum {
923    DIV_BASE = 1,
924    DIV_OE = 2,
925    DIV_DOT = 4,
926 } div_type_t;
927 /* Possible divde type combinations are:
928  *   - base
929  *   - base+dot
930  *   - base+OE
931  *   - base+OE+dot
932  */
933 #ifdef __powerpc64__
test_divdeu(void)934 static void test_divdeu(void)
935 {
936    int divdeu_type = DIV_BASE;
937    if (do_OE)
938       divdeu_type |= DIV_OE;
939    if (do_dot)
940       divdeu_type |= DIV_DOT;
941 
942    switch (divdeu_type) {
943       case 1:
944         SET_CR_XER_ZERO;
945          __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
946          GET_CR_XER(div_flags, div_xer);
947          break;
948       case 3:
949         SET_CR_XER_ZERO;
950          __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
951          GET_CR_XER(div_flags, div_xer);
952          break;
953       case 5:
954         SET_CR_XER_ZERO;
955          __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
956          GET_CR_XER(div_flags, div_xer);
957          break;
958       case 7:
959         SET_CR_XER_ZERO;
960          __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
961          GET_CR_XER(div_flags, div_xer);
962          break;
963       default:
964          fprintf(stderr, "Invalid divdeu type. Exiting\n");
965          exit(1);
966    }
967 }
968 #endif
969 
test_divwe(void)970 static void test_divwe(void)
971 {
972    int divwe_type = DIV_BASE;
973    if (do_OE)
974       divwe_type |= DIV_OE;
975    if (do_dot)
976       divwe_type |= DIV_DOT;
977 
978    switch (divwe_type) {
979       case 1:
980         SET_CR_XER_ZERO;
981          __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
982          GET_CR_XER(div_flags, div_xer);
983          break;
984       case 3:
985         SET_CR_XER_ZERO;
986          __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
987          GET_CR_XER(div_flags, div_xer);
988          break;
989       case 5:
990         SET_CR_XER_ZERO;
991          __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
992          GET_CR_XER(div_flags, div_xer);
993          break;
994       case 7:
995         SET_CR_XER_ZERO;
996          __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
997          GET_CR_XER(div_flags, div_xer);
998          break;
999       default:
1000          fprintf(stderr, "Invalid divweu type. Exiting\n");
1001          exit(1);
1002    }
1003 }
1004 
1005 
1006 typedef struct simple_test {
1007    test_func_t test_func;
1008    char * name;
1009    precision_type_t precision;
1010 } simple_test_t;
1011 
1012 
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1013 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1014 {
1015    int a_idx, b_idx, i;
1016    void * inA, * inB;
1017    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1018 
1019    for (i = 0; i < 4; i++) {
1020       a_idx = targs->fra_idx;
1021       b_idx = targs->frb_idx;
1022       inA = (void *)&spec_sp_fargs[a_idx];
1023       inB = (void *)&spec_sp_fargs[b_idx];
1024       // copy single precision FP  into vector element i
1025       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1026       memcpy(vec_src + (i * 4), inB, 4);
1027       targs++;
1028    }
1029 }
1030 
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1031 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1032 {
1033    int a_idx, b_idx, i;
1034    void * inA, * inB;
1035    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1036 
1037    for (i = 0; i < 2; i++) {
1038       a_idx = targs->fra_idx;
1039       b_idx = targs->frb_idx;
1040       inA = (void *)&spec_fargs[a_idx];
1041       inB = (void *)&spec_fargs[b_idx];
1042       // copy double precision FP  into vector element i
1043       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1044       memcpy(vec_src + (i * 8), inB, 8);
1045       targs++;
1046    }
1047 }
1048 
1049 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1050 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1051 {
1052    int a_idx, b_idx, k;
1053    char * name = malloc(20);
1054    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1055    int loops = dp ? 2 : 4;
1056    fp_test_args_t * targs = &test_group->targs[i];
1057    unsigned long long * frA_dp, * frB_dp, * dst_dp;
1058    unsigned int * frA_sp, *frB_sp, * dst_sp;
1059    strcpy(name, test_group->name);
1060    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1061    for (k = 0; k < loops; k++) {
1062       a_idx = targs->fra_idx;
1063       b_idx = targs->frb_idx;
1064       if (k)
1065          printf(" AND ");
1066       if (dp) {
1067          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1068          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1069          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1070       } else {
1071          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1072          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1073          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1074       }
1075       targs++;
1076    }
1077    if (cc != VX_NOT_CMP_OP)
1078       printf(" ? cc=%x", cc);
1079 
1080    if (print_vec_out) {
1081       if (dp) {
1082          dst_dp = (unsigned long long *) &vec_out;
1083          printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1084       } else {
1085          dst_sp = (unsigned int *) &vec_out;
1086          printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1087       }
1088    } else {
1089       printf("\n");
1090    }
1091    free(name);
1092 }
1093 
1094 
1095 
test_vsx_one_fp_arg(void)1096 static void test_vsx_one_fp_arg(void)
1097 {
1098    test_func_t func;
1099    int k;
1100    k = 0;
1101    build_special_fargs_table();
1102 
1103    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1104       int idx, i;
1105       vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1106       Bool estimate = (test_group.type == VX_ESTIMATE);
1107       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1108       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1109       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1110       Bool sparse_sp = False;
1111       int stride = dp ? 2 : 4;
1112       int loops = is_scalar ? 1 : stride;
1113       stride = is_scalar ? 1: stride;
1114 
1115       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1116        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1117        *                     or
1118        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1119        *
1120        * For the vector op case, we need to adjust stride from '4' to '2', since
1121        * we'll only be loading two values per loop into the input register.
1122        */
1123       if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1124          sparse_sp = True;
1125          stride = 2;
1126       }
1127 
1128       for (i = 0; i < test_group.num_tests; i+=stride) {
1129          unsigned int * pv;
1130          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1131 
1132          pv = (unsigned int *)&vec_out;
1133          // clear vec_out
1134          for (idx = 0; idx < 4; idx++, pv++)
1135             *pv = 0;
1136 
1137          if (dp) {
1138             int j;
1139             unsigned long long * frB_dp, *dst_dp;
1140             for (j = 0; j < loops; j++) {
1141                inB = (void *)&spec_fargs[i + j];
1142                // copy double precision FP into vector element i
1143                if (isLE && is_scalar)
1144                   vecB_void_ptr += 8;
1145                memcpy(vecB_void_ptr + (j * 8), inB, 8);
1146             }
1147             // execute test insn
1148             (*func)();
1149             dst_dp = (unsigned long long *) &vec_out;
1150             if (isLE && is_scalar)
1151                dst_dp++;
1152             printf("#%d: %s ", i/stride, test_group.name);
1153             for (j = 0; j < loops; j++) {
1154                if (j)
1155                   printf("; ");
1156                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1157                printf("%s(%016llx)", test_group.op, *frB_dp);
1158                if (estimate) {
1159                   Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1160                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1161                   /* For debugging . . .
1162                    printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1163                    */
1164                } else {
1165                   vx_fp_test_type type = test_group.type;
1166                   switch (type) {
1167                      case VX_SCALAR_CONV_TO_WORD:
1168                         printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1169                         break;
1170                      case VX_CONV_TO_SINGLE:
1171                         printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1172                         break;
1173                      default:  // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1174                         printf(" = %016llx", dst_dp[j]);
1175                   }
1176                }
1177             }
1178             printf("\n");
1179          } else {
1180             int j;
1181             unsigned int * frB_sp, * dst_sp = NULL;
1182             unsigned long long * dst_dp = NULL;
1183             if (sparse_sp)
1184                loops = 2;
1185             for (j = 0; j < loops; j++) {
1186                inB = (void *)&spec_sp_fargs[i + j];
1187                // copy single precision FP into vector element i
1188                if (sparse_sp) {
1189                   if (isLE)
1190                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1191                   else
1192                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1193                } else {
1194                   if (isLE && is_scalar)
1195                      vecB_void_ptr += 12;
1196                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1197                }
1198             }
1199             // execute test insn
1200             (*func)();
1201             if (test_group.type == VX_CONV_TO_DOUBLE) {
1202                dst_dp = (unsigned long long *) &vec_out;
1203                if (isLE && is_scalar)
1204                   dst_dp++;
1205             } else {
1206                dst_sp = (unsigned int *) &vec_out;
1207                if (isLE && is_scalar)
1208                   dst_sp += 3;
1209             }
1210             // print result
1211             printf("#%d: %s ", i/stride, test_group.name);
1212             for (j = 0; j < loops; j++) {
1213                if (j)
1214                   printf("; ");
1215                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1216                printf("%s(%08x)", test_group.op, *frB_sp);
1217                if (estimate) {
1218                   Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1219                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1220                } else {
1221                   if (test_group.type == VX_CONV_TO_DOUBLE)
1222                         printf(" = %016llx", dst_dp[j]);
1223                   else
1224                   /* Special case: Current VEX implementation for fsqrts (single precision)
1225                    * uses the same implementation as that used for double precision fsqrt.
1226                    * However, I've found that for xvsqrtsp, the result from that implementation
1227                    * may be off by the two LSBs.  Generally, even this small inaccuracy can cause the
1228                    * output to appear very different if you end up with a carry.  But for the given
1229                    * inputs in this testcase, we can simply mask out these bits.
1230                    */
1231                      printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1232                }
1233             }
1234             printf("\n");
1235          }
1236       }
1237       k++;
1238       printf( "\n" );
1239    }
1240 }
1241 
test_int_to_fp_convert(void)1242 static void test_int_to_fp_convert(void)
1243 {
1244    test_func_t func;
1245    int k;
1246    k = 0;
1247 
1248    while ((func = intToFp_tests[k].test_func)) {
1249       int idx, i;
1250       vx_intToFp_test_t test_group = intToFp_tests[k];
1251       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1252       Bool sparse_sp = False;
1253       int stride = dp ? 2 : 4;
1254       int loops = stride;
1255 
1256       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1257        *    |___ int___|_Unused_|___int___|__Unused__|   // for vector op
1258        *                     or
1259        * We need to adjust stride from '4' to '2', since we'll only be loading
1260        * two values per loop into the input register.
1261        */
1262       if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1263          sparse_sp = True;
1264          stride = 2;
1265       }
1266 
1267       for (i = 0; i < test_group.num_tests; i+=stride) {
1268          unsigned int * pv;
1269          void * inB;
1270 
1271          pv = (unsigned int *)&vec_out;
1272          // clear vec_out
1273          for (idx = 0; idx < 4; idx++, pv++)
1274             *pv = 0;
1275 
1276          if (dp) {
1277             int j;
1278             unsigned long long  *dst_dw, * targs = test_group.targs;
1279             for (j = 0; j < loops; j++) {
1280                inB = (void *)&targs[i + j];
1281                // copy doubleword into vector element i
1282                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1283             }
1284             // execute test insn
1285             (*func)();
1286             dst_dw = (unsigned long long *) &vec_out;
1287             printf("#%d: %s ", i/stride, test_group.name);
1288             for (j = 0; j < loops; j++) {
1289                if (j)
1290                   printf("; ");
1291                printf("conv(%016llx)", targs[i + j]);
1292 
1293                if (test_group.type == VX_CONV_TO_SINGLE)
1294                   printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1295                else
1296                   printf(" = %016llx", dst_dw[j]);
1297             }
1298             printf("\n");
1299          } else {
1300             int j;
1301             unsigned int * dst_sp = NULL;
1302             unsigned int * targs = test_group.targs;
1303             unsigned long long * dst_dp = NULL;
1304             void * vecB_void_ptr = (void *)&vec_inB;
1305             if (sparse_sp)
1306                loops = 2;
1307             for (j = 0; j < loops; j++) {
1308                inB = (void *)&targs[i + j];
1309                // copy single word into vector element i
1310                if (sparse_sp) {
1311                   if (isLE)
1312                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1313                   else
1314                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1315                } else {
1316                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1317                }
1318             }
1319             // execute test insn
1320             (*func)();
1321             if (test_group.type == VX_CONV_TO_DOUBLE)
1322                dst_dp = (unsigned long long *) &vec_out;
1323             else
1324                dst_sp = (unsigned int *) &vec_out;
1325             // print result
1326             printf("#%d: %s ", i/stride, test_group.name);
1327             for (j = 0; j < loops; j++) {
1328                if (j)
1329                   printf("; ");
1330                printf("conv(%08x)", targs[i + j]);
1331                if (test_group.type == VX_CONV_TO_DOUBLE)
1332                   printf(" = %016llx", dst_dp[j]);
1333                else
1334                   printf(" = %08x", dst_sp[j]);
1335             }
1336             printf("\n");
1337          }
1338       }
1339       k++;
1340       printf( "\n" );
1341    }
1342 }
1343 
1344 
1345 
1346 // The div doubleword test data
1347 signed long long div_dw_tdata[13][2] = {
1348                                        { 4, -4 },
1349                                        { 4, -3 },
1350                                        { 4, 4 },
1351                                        { 4, -5 },
1352                                        { 3, 8 },
1353                                        { 0x8000000000000000ULL, 0xa },
1354                                        { 0x50c, -1 },
1355                                        { 0x50c, -4096 },
1356                                        { 0x1234fedc, 0x8000a873 },
1357                                        { 0xabcd87651234fedcULL, 0xa123b893 },
1358                                        { 0x123456789abdcULL, 0 },
1359                                        { 0, 2 },
1360                                        { 0x77, 0xa3499 }
1361 };
1362 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1363 
1364 // The div word test data
1365 unsigned int div_w_tdata[6][2] = {
1366                               { 0, 2 },
1367                               { 2, 0 },
1368                               { 0x7abc1234, 0xf0000000 },
1369                               { 0xfabc1234, 5 },
1370                               { 77, 66 },
1371                               { 5, 0xfabc1234 },
1372 };
1373 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1374 
1375 typedef struct div_ext_test
1376 {
1377    test_func_t test_func;
1378    const char *name;
1379    int num_tests;
1380    div_type_t div_type;
1381    precision_type_t precision;
1382 } div_ext_test_t;
1383 
1384 static div_ext_test_t div_tests[] = {
1385 #ifdef __powerpc64__
1386                                    { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1387                                    { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1388 #endif
1389                                    { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1390                                    { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1391                                    { NULL, NULL, 0, 0, 0 }
1392 };
1393 
test_div_extensions(void)1394 static void test_div_extensions(void)
1395 {
1396    test_func_t func;
1397    int k;
1398    k = 0;
1399 
1400    while ((func = div_tests[k].test_func)) {
1401       int i, repeat = 1;
1402       div_ext_test_t test_group = div_tests[k];
1403       do_dot = False;
1404 
1405 again:
1406       for (i = 0; i < test_group.num_tests; i++) {
1407          unsigned int condreg;
1408 
1409          if (test_group.div_type == DIV_OE)
1410             do_OE = True;
1411          else
1412             do_OE = False;
1413 
1414          if (test_group.precision == DOUBLE_TEST) {
1415             r14 = div_dw_tdata[i][0];
1416             r15 = div_dw_tdata[i][1];
1417          } else {
1418             r14 = div_w_tdata[i][0];
1419             r15 = div_w_tdata[i][1];
1420          }
1421          // execute test insn
1422          (*func)();
1423          condreg = (div_flags & 0xf0000000) >> 28;
1424          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1425          if (test_group.precision == DOUBLE_TEST) {
1426             printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1427                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1428          } else {
1429             printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1430                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1431          }
1432          printf(" CR=%x; XER=%x\n", condreg, div_xer);
1433       }
1434       printf("\n");
1435       if (repeat) {
1436          repeat = 0;
1437          do_dot = True;
1438          goto again;
1439       }
1440       k++;
1441       printf( "\n" );
1442    }
1443 }
1444 
1445 
test_vx_tdivORtsqrt(void)1446 static void test_vx_tdivORtsqrt(void)
1447 {
1448    test_func_t func;
1449    int k, crx;
1450    unsigned int flags;
1451    k = 0;
1452    do_dot = False;
1453    build_special_fargs_table();
1454 
1455    while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1456       int idx, i;
1457       vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1458       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1459       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1460       Bool two_args = test_group.targs ?  True : False;
1461       int stride = dp ? 2 : 4;
1462       int loops = is_scalar ? 1 : stride;
1463       stride = is_scalar ? 1: stride;
1464 
1465       for (i = 0; i < test_group.num_tests; i+=stride) {
1466          unsigned int * pv;
1467          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1468 
1469          pv = (unsigned int *)&vec_out;
1470          // clear vec_out
1471          for (idx = 0; idx < 4; idx++, pv++)
1472             *pv = 0;
1473 
1474          if (dp) {
1475             int j;
1476             unsigned long long * frB_dp;
1477             if (two_args) {
1478                setup_dp_fp_args(&test_group.targs[i], False);
1479             } else {
1480                for (j = 0; j < loops; j++) {
1481                   inB = (void *)&spec_fargs[i + j];
1482                   // copy double precision FP into vector element i
1483                   if (isLE && is_scalar)
1484                      vecB_void_ptr += 8;
1485                   memcpy(vecB_void_ptr + (j * 8), inB, 8);
1486                }
1487             }
1488             // execute test insn
1489             // Must do set/get of CRs immediately before/after calling the asm func
1490             // to avoid CRs being modified by other instructions.
1491             SET_FPSCR_ZERO;
1492             SET_CR_XER_ZERO;
1493             (*func)();
1494             GET_CR(flags);
1495             // assumes using CR1
1496             crx = (flags & 0x0f000000) >> 24;
1497             if (two_args) {
1498                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1499             } else {
1500                printf("#%d: %s ", i/stride, test_group.name);
1501                for (j = 0; j < loops; j++) {
1502                   if (j)
1503                      printf("; ");
1504                   frB_dp = (unsigned long long *)&spec_fargs[i + j];
1505                   printf("%s(%016llx)", test_group.op, *frB_dp);
1506                }
1507                printf( " ? %x (CRx)\n", crx);
1508             }
1509          } else {
1510             int j;
1511             unsigned int * frB_sp;
1512             if (two_args) {
1513                setup_sp_fp_args(&test_group.targs[i], False);
1514             } else {
1515                for (j = 0; j < loops; j++) {
1516                   inB = (void *)&spec_sp_fargs[i + j];
1517                   // copy single precision FP into vector element i
1518                   memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1519                }
1520             }
1521             // execute test insn
1522             SET_FPSCR_ZERO;
1523             SET_CR_XER_ZERO;
1524             (*func)();
1525             GET_CR(flags);
1526             crx = (flags & 0x0f000000) >> 24;
1527             // print result
1528             if (two_args) {
1529                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1530             } else {
1531                printf("#%d: %s ", i/stride, test_group.name);
1532                for (j = 0; j < loops; j++) {
1533                   if (j)
1534                      printf("; ");
1535                   frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1536                   printf("%s(%08x)", test_group.op, *frB_sp);
1537                }
1538                printf( " ? %x (CRx)\n", crx);
1539             }
1540          }
1541       }
1542       k++;
1543       printf( "\n" );
1544    }
1545 }
1546 
1547 
test_ftsqrt(void)1548 static void test_ftsqrt(void)
1549 {
1550    int i, crx;
1551    unsigned int flags;
1552    unsigned long long * frbp;
1553    build_special_fargs_table();
1554 
1555 
1556    for (i = 0; i < nb_special_fargs; i++) {
1557       f14 = spec_fargs[i];
1558       frbp = (unsigned long long *)&spec_fargs[i];
1559       SET_FPSCR_ZERO;
1560       SET_CR_XER_ZERO;
1561       __asm__ __volatile__ ("ftsqrt           cr1, %0" : : "d" (f14));
1562       GET_CR(flags);
1563       crx = (flags & 0x0f000000) >> 24;
1564       printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1565    }
1566    printf( "\n" );
1567 }
1568 
1569 static void
test_popcntw(void)1570 test_popcntw(void)
1571 {
1572 #ifdef __powerpc64__
1573    uint64_t res;
1574    unsigned long long src = 0x9182736405504536ULL;
1575    r14 = src;
1576    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1577    printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1578 #else
1579    uint32_t res;
1580    unsigned int src = 0x9182730E;
1581    r14 = src;
1582    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1583    printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1584 #endif
1585    printf( "\n" );
1586 }
1587 
1588 
1589 static test_table_t
1590          all_tests[] =
1591 {
1592 
1593                     { &test_vsx_one_fp_arg,
1594                       "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
1595                     { &test_int_to_fp_convert,
1596                       "Test VSX vector integer to float conversion instructions", OTHER_INST },
1597                     { &test_div_extensions,
1598 		      "Test div extensions", SCALAR_DIV_INST },
1599                     { &test_ftsqrt,
1600 		      "Test ftsqrt instruction", OTHER_INST },
1601                     { &test_vx_tdivORtsqrt,
1602 		      "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
1603                     { &test_popcntw,
1604 		      "Test popcntw instruction", OTHER_INST },
1605                     { NULL, NULL }
1606 };
1607 #endif // HAS_VSX
1608 
usage(void)1609 static void usage (void)
1610 {
1611   fprintf(stderr,
1612 	  "Usage: test_isa_3_0 [OPTIONS]\n"
1613 	  "\t-d: test scalar division instructions (default)\n"
1614 	  "\t-o: test non scalar division instructions (default)\n"
1615 	  "\t-A: test all instructions (default)\n"
1616 	  "\t-h: display this help and exit\n"
1617 	  );
1618 }
1619 
main(int argc,char ** argv)1620 int main(int argc, char **argv)
1621 {
1622 #ifdef HAS_VSX
1623 
1624    test_table_t aTest;
1625    test_func_t func;
1626    int c;
1627    int i = 0;
1628    unsigned int test_run_mask = 0;
1629 
1630    /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1631     * bits are set on various arithimetic instructions.  This means this
1632     * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1633     * hardware.  The tests have been grouped so that the tests that generate
1634     * different results are in one test and the rest are in a different test.
1635     * this minimizes the size of the result expect files for the two cases.
1636     */
1637 
1638    while ((c = getopt(argc, argv, "doAh")) != -1) {
1639       switch (c) {
1640       case 'd':
1641 	test_run_mask |= SCALAR_DIV_INST;
1642          break;
1643       case 'o':
1644 	test_run_mask |= OTHER_INST;
1645          break;
1646       case 'A':
1647 	test_run_mask = 0xFFFF;
1648          break;
1649       case 'h':
1650          usage();
1651          return 0;
1652 
1653       default:
1654          usage();
1655          fprintf(stderr, "Unknown argument: '%c'\n", c);
1656          return 1;
1657       }
1658    }
1659 
1660    while ((func = all_tests[i].test_category)) {
1661       aTest = all_tests[i];
1662 
1663       if(test_run_mask & aTest.test_group) {
1664 	/* Test group  specified on command line */
1665 
1666 	printf( "%s\n", aTest.name );
1667 	(*func)();
1668       }
1669       i++;
1670    }
1671    if (spec_fargs)
1672      free(spec_fargs);
1673    if (spec_sp_fargs)
1674      free(spec_sp_fargs);
1675 
1676 #endif // HAS _VSX
1677 
1678    return 0;
1679 }
1680