1 /*  Copyright (C) 2011 IBM
2 
3  Author: Maynard Johnson <maynardj@us.ibm.com>
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License as
7  published by the Free Software Foundation; either version 2 of the
8  License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307, USA.
19 
20  The GNU General Public License is contained in the file COPYING.
21  */
22 
23 #ifdef HAS_VSX
24 
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <malloc.h>
30 #include <altivec.h>
31 #include <math.h>
32 
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
38 
39 #ifdef VGP_ppc64le_linux
40 #define isLE 1
41 #else
42 #define isLE 0
43 #endif
44 
45 typedef unsigned char Bool;
46 #define True 1
47 #define False 0
48 register HWord_t r14 __asm__ ("r14");
49 register HWord_t r15 __asm__ ("r15");
50 register HWord_t r16 __asm__ ("r16");
51 register HWord_t r17 __asm__ ("r17");
52 register double f14 __asm__ ("fr14");
53 register double f15 __asm__ ("fr15");
54 register double f16 __asm__ ("fr16");
55 register double f17 __asm__ ("fr17");
56 
57 static volatile unsigned int div_flags, div_xer;
58 
59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
60 
61 #define SET_CR(_arg) \
62       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
63 
64 #define SET_XER(_arg) \
65       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
66 
67 #define GET_CR(_lval) \
68       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
69 
70 #define GET_XER(_lval) \
71       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
72 
73 #define GET_CR_XER(_lval_cr,_lval_xer) \
74    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
75 
76 #define SET_CR_ZERO \
77       SET_CR(0)
78 
79 #define SET_XER_ZERO \
80       SET_XER(0)
81 
82 #define SET_CR_XER_ZERO \
83    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
84 
85 #define SET_FPSCR_ZERO \
86    do { double _d = 0.0; \
87         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
88    } while (0)
89 
90 
91 typedef void (*test_func_t)(void);
92 typedef struct test_table test_table_t;
93 
94 
95 /* These functions below that construct a table of floating point
96  * values were lifted from none/tests/ppc32/jm-insns.c.
97  */
98 
99 #if defined (DEBUG_ARGS_BUILD)
100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
101 #else
102 #define AB_DPRINTF(fmt, args...) do { } while (0)
103 #endif
104 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)105 static inline void register_farg (void *farg,
106                                   int s, uint16_t _exp, uint64_t mant)
107 {
108    uint64_t tmp;
109 
110    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
111    *(uint64_t *)farg = tmp;
112    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
113               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
114 }
115 
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)116 static inline void register_sp_farg (void *farg,
117                                      int s, uint16_t _exp, uint32_t mant)
118 {
119    uint32_t tmp;
120    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
121    *(uint32_t *)farg = tmp;
122 }
123 
124 
125 typedef struct fp_test_args {
126    int fra_idx;
127    int frb_idx;
128 } fp_test_args_t;
129 
130 
131 fp_test_args_t two_arg_fp_tests[] = {
132                                      {8, 8},
133                                      {8, 14},
134                                      {15, 16},
135                                      {8, 5},
136                                      {8, 4},
137                                      {8, 7},
138                                      {8, 9},
139                                      {8, 11},
140                                      {14, 8},
141                                      {14, 14},
142                                      {14, 6},
143                                      {14, 5},
144                                      {14, 4},
145                                      {14, 7},
146                                      {14, 9},
147                                      {14, 11},
148                                      {6, 8},
149                                      {6, 14},
150                                      {6, 6},
151                                      {6, 5},
152                                      {6, 4},
153                                      {6, 7},
154                                      {6, 9},
155                                      {6, 11},
156                                      {5, 8},
157                                      {5, 14},
158                                      {5, 6},
159                                      {5, 5},
160                                      {5, 4},
161                                      {5, 7},
162                                      {5, 9},
163                                      {5, 11},
164                                      {4, 8},
165                                      {4, 14},
166                                      {4, 6},
167                                      {4, 5},
168                                      {4, 1},
169                                      {4, 7},
170                                      {4, 9},
171                                      {4, 11},
172                                      {7, 8},
173                                      {7, 14},
174                                      {7, 6},
175                                      {7, 5},
176                                      {7, 4},
177                                      {7, 7},
178                                      {7, 9},
179                                      {7, 11},
180                                      {10, 8},
181                                      {10, 14},
182                                      {12, 6},
183                                      {12, 5},
184                                      {10, 4},
185                                      {10, 7},
186                                      {10, 9},
187                                      {10, 11},
188                                      {12, 8 },
189                                      {12, 14},
190                                      {12, 6},
191                                      {15, 16},
192                                      {15, 16},
193                                      {9, 11},
194                                      {11, 11},
195                                      {11, 12},
196                                      {16, 18},
197                                      {17, 16},
198                                      {19, 19},
199                                      {19, 18}
200 };
201 
202 
203 static int nb_special_fargs;
204 static double * spec_fargs;
205 static float * spec_sp_fargs;
206 
build_special_fargs_table(void)207 static void build_special_fargs_table(void)
208 {
209 /*
210   Entry  Sign Exp   fraction                  Special value
211    0      0   3fd   0x8000000000000ULL         Positive finite number
212    1      0   404   0xf000000000000ULL         ...
213    2      0   001   0x8000000b77501ULL         ...
214    3      0   7fe   0x800000000051bULL         ...
215    4      0   012   0x3214569900000ULL         ...
216    5      0   000   0x0000000000000ULL         +0.0 (+zero)
217    6      1   000   0x0000000000000ULL         -0.0 (-zero)
218    7      0   7ff   0x0000000000000ULL         +infinity
219    8      1   7ff   0x0000000000000ULL         -infinity
220    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
221    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
222    11     0   7ff   0x8000000000000ULL         +QNaN
223    12     1   7ff   0x8000000000000ULL         -QNaN
224    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
225    14     1   40d   0x0650f5a07b353ULL         Negative finite number
226    15     0   412   0x32585a9900000ULL         A few more positive finite numbers
227    16     0   413   0x82511a2000000ULL         ...
228    17  . . . . . . . . . . . . . . . . . . . . . . .
229    18  . . . . . . . . . . . . . . . . . . . . . . .
230    19  . . . . . . . . . . . . . . . . . . . . . . .
231 */
232 
233    uint64_t mant;
234    uint32_t mant_sp;
235    uint16_t _exp;
236    int s;
237    int j, i = 0;
238 
239    if (spec_fargs)
240       return;
241 
242    spec_fargs = malloc( 20 * sizeof(double) );
243    spec_sp_fargs = malloc( 20 * sizeof(float) );
244 
245    // #0
246    s = 0;
247    _exp = 0x3fd;
248    mant = 0x8000000000000ULL;
249    register_farg(&spec_fargs[i++], s, _exp, mant);
250 
251    // #1
252    s = 0;
253    _exp = 0x404;
254    mant = 0xf000000000000ULL;
255    register_farg(&spec_fargs[i++], s, _exp, mant);
256 
257    // #2
258    s = 0;
259    _exp = 0x001;
260    mant = 0x8000000b77501ULL;
261    register_farg(&spec_fargs[i++], s, _exp, mant);
262 
263    // #3
264    s = 0;
265    _exp = 0x7fe;
266    mant = 0x800000000051bULL;
267    register_farg(&spec_fargs[i++], s, _exp, mant);
268 
269    // #4
270    s = 0;
271    _exp = 0x012;
272    mant = 0x3214569900000ULL;
273    register_farg(&spec_fargs[i++], s, _exp, mant);
274 
275 
276    /* Special values */
277    /* +0.0      : 0 0x000 0x0000000000000 */
278    // #5
279    s = 0;
280    _exp = 0x000;
281    mant = 0x0000000000000ULL;
282    register_farg(&spec_fargs[i++], s, _exp, mant);
283 
284    /* -0.0      : 1 0x000 0x0000000000000 */
285    // #6
286    s = 1;
287    _exp = 0x000;
288    mant = 0x0000000000000ULL;
289    register_farg(&spec_fargs[i++], s, _exp, mant);
290 
291    /* +infinity : 0 0x7FF 0x0000000000000  */
292    // #7
293    s = 0;
294    _exp = 0x7FF;
295    mant = 0x0000000000000ULL;
296    register_farg(&spec_fargs[i++], s, _exp, mant);
297 
298    /* -infinity : 1 0x7FF 0x0000000000000 */
299    // #8
300    s = 1;
301    _exp = 0x7FF;
302    mant = 0x0000000000000ULL;
303    register_farg(&spec_fargs[i++], s, _exp, mant);
304 
305    /*
306     * This comment applies to values #9 and #10 below:
307     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
308     * so we can't just copy the double-precision value to the corresponding slot in the
309     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
310     * have to manually set the bits using register_sp_farg().
311     */
312 
313    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
314    // #9
315    s = 0;
316    _exp = 0x7FF;
317    mant = 0x7FFFFFFFFFFFFULL;
318    register_farg(&spec_fargs[i++], s, _exp, mant);
319    _exp = 0xff;
320    mant_sp = 0x3FFFFF;
321    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
322 
323    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
324    // #10
325    s = 1;
326    _exp = 0x7FF;
327    mant = 0x7FFFFFFFFFFFFULL;
328    register_farg(&spec_fargs[i++], s, _exp, mant);
329    _exp = 0xff;
330    mant_sp = 0x3FFFFF;
331    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
332 
333    /* +QNaN     : 0 0x7FF 0x8000000000000 */
334    // #11
335    s = 0;
336    _exp = 0x7FF;
337    mant = 0x8000000000000ULL;
338    register_farg(&spec_fargs[i++], s, _exp, mant);
339 
340    /* -QNaN     : 1 0x7FF 0x8000000000000 */
341    // #12
342    s = 1;
343    _exp = 0x7FF;
344    mant = 0x8000000000000ULL;
345    register_farg(&spec_fargs[i++], s, _exp, mant);
346 
347    /* denormalized value */
348    // #13
349    s = 1;
350    _exp = 0x000;
351    mant = 0x8340000078000ULL;
352    register_farg(&spec_fargs[i++], s, _exp, mant);
353 
354    /* Negative finite number */
355    // #14
356    s = 1;
357    _exp = 0x40d;
358    mant = 0x0650f5a07b353ULL;
359    register_farg(&spec_fargs[i++], s, _exp, mant);
360 
361    /* A few positive finite numbers ... */
362    // #15
363    s = 0;
364    _exp = 0x412;
365    mant = 0x32585a9900000ULL;
366    register_farg(&spec_fargs[i++], s, _exp, mant);
367 
368    // #16
369    s = 0;
370    _exp = 0x413;
371    mant = 0x82511a2000000ULL;
372    register_farg(&spec_fargs[i++], s, _exp, mant);
373 
374    // #17
375    s = 0;
376    _exp = 0x403;
377    mant = 0x12ef5a9300000ULL;
378    register_farg(&spec_fargs[i++], s, _exp, mant);
379 
380    // #18
381    s = 0;
382    _exp = 0x405;
383    mant = 0x14bf5d2300000ULL;
384    register_farg(&spec_fargs[i++], s, _exp, mant);
385 
386    // #19
387    s = 0;
388    _exp = 0x409;
389    mant = 0x76bf982440000ULL;
390    register_farg(&spec_fargs[i++], s, _exp, mant);
391 
392    nb_special_fargs = i;
393    for (j = 0; j < i; j++) {
394       if (!(j == 9 || j == 10))
395          spec_sp_fargs[j] = spec_fargs[j];
396    }
397 }
398 
399 
400 struct test_table
401 {
402    test_func_t test_category;
403    char * name;
404 };
405 
406 /*  Type of input for floating point operations.*/
407 typedef enum {
408    SINGLE_TEST,
409    DOUBLE_TEST
410 } precision_type_t;
411 
412 typedef enum {
413    VX_SCALAR_CONV_TO_WORD,
414    VX_CONV_TO_SINGLE,
415    VX_CONV_TO_DOUBLE,
416    VX_ESTIMATE,
417    VX_DEFAULT
418 } vx_fp_test_type;
419 
420 static vector unsigned int vec_out, vec_inA, vec_inB;
421 
422 /* This function is for checking the reciprocal and reciprocal square root
423  * estimate instructions.
424  */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)425 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
426 {
427    /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
428     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
429     * does an actual reciprocal calculation versus estimation, so the answer we get back from
430     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
431     * precision) and the estimate may still be within expected tolerances.  On top of that,
432     * we can't count on these estimates always being the same across implementations.
433     * For example, with the fre[s] instruction (which should be correct to within one part
434     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
435     * one implementation could return 1.0111_1111_0000 and another implementation could return
436     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
437     * single bit in common.
438     *
439     * The upshot is we can't validate the VEX output for these instructions by comparing against
440     * stored bit patterns.  We must check that the result is within expected tolerances.
441     */
442 
443 
444    /* A mask to be used for validation as a last resort.
445     * Only use 12 bits of precision for reasons discussed above.
446     */
447 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
448 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
449 
450    Bool result = False;
451    Bool dp_test = type == DOUBLE_TEST;
452    double src_dp, res_dp;
453    float src_sp, res_sp;
454    src_dp = res_dp = 0;
455    src_sp = res_sp = 0;
456 #define SRC (dp_test ? src_dp : src_sp)
457 #define RES (dp_test ? res_dp : res_sp)
458    Bool src_is_negative = False;
459    Bool res_is_negative = False;
460    unsigned long long * dst_dp = NULL;
461    unsigned int * dst_sp = NULL;
462    if (dp_test) {
463       unsigned long long * src_dp_ull;
464       dst_dp = (unsigned long long *) &vec_out;
465       src_dp = spec_fargs[idx];
466       src_dp_ull = (unsigned long long *) &src_dp;
467       src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
468       res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
469       memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
470    } else {
471       unsigned int * src_sp_uint;
472       dst_sp = (unsigned int *) &vec_out;
473       src_sp = spec_sp_fargs[idx];
474       src_sp_uint = (unsigned int *) &src_sp;
475       src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
476       res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
477       memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
478    }
479 
480    // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
481    if (isnan(SRC))
482       return isnan(RES);
483    if (fpclassify(SRC) == FP_ZERO)
484       return isinf(RES);
485    if (!src_is_negative && isinf(SRC))
486       return !res_is_negative && (fpclassify(RES) == FP_ZERO);
487    if (is_rsqrte) {
488       if (src_is_negative)
489          return isnan(RES);
490    } else {
491       if (src_is_negative && isinf(SRC))
492          return res_is_negative && (fpclassify(RES) == FP_ZERO);
493    }
494    if (dp_test) {
495       double calc_diff;
496       double real_diff;
497       double recip_divisor;
498       double div_result;
499       double calc_diff_tmp;
500 
501       if (is_rsqrte)
502          recip_divisor = sqrt(src_dp);
503       else
504          recip_divisor = src_dp;
505 
506       div_result = 1.0/recip_divisor;
507       calc_diff_tmp = recip_divisor * 16384.0;
508       if (isnormal(calc_diff_tmp)) {
509          calc_diff = fabs(1.0/calc_diff_tmp);
510          real_diff = fabs(res_dp - div_result);
511          result = ( ( res_dp == div_result )
512                   || ( real_diff <= calc_diff ) );
513       } else {
514          /* Unable to compute theoretical difference, so we fall back to masking out
515           * un-precise bits.
516           */
517          unsigned long long * div_result_dp = (unsigned long long *) &div_result;
518          result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
519       }
520       /* For debug use . . .
521          if (!result) {
522              unsigned long long * dv = &div_result;
523              unsigned long long * rd = &real_diff;
524              unsigned long long * cd = &calc_diff;
525              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
526        *dv, *rd, *cd);
527           }
528        */
529    } else {  // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
530       float calc_diff;
531       float real_diff;
532       float div_result;
533       float calc_diff_tmp;
534       float recip_divisor = sqrt(src_sp);
535 
536       div_result = 1.0/recip_divisor;
537       calc_diff_tmp = recip_divisor * 16384.0;
538       if (isnormal(calc_diff_tmp)) {
539          calc_diff = fabsf(1.0/calc_diff_tmp);
540          real_diff = fabsf(res_sp - div_result);
541          result = ( ( res_sp == div_result )
542                   || ( real_diff <= calc_diff ) );
543       } else {
544          /* Unable to compute theoretical difference, so we fall back to masking out
545           * un-precise bits.
546           */
547          unsigned int * div_result_sp = (unsigned int *) &div_result;
548          result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
549       }
550       /* For debug use . . .
551          if (!result) {
552              unsigned long long * dv = &div_result;
553              unsigned long long * rd = &real_diff;
554              unsigned long long * cd = &calc_diff;
555              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
556        *dv, *rd, *cd);
557           }
558        */
559    }
560    return result;
561 }
562 
563 typedef struct vx_fp_test
564 {
565    test_func_t test_func;
566    const char * name;
567    fp_test_args_t * targs;
568    int num_tests;
569    precision_type_t precision;
570    vx_fp_test_type type;
571    const char * op;
572 } vx_fp_test_t;
573 
574 
575 static Bool do_dot;
576 
test_xvredp(void)577 static void test_xvredp(void)
578 {
579    __asm__ __volatile__ ("xvredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
580 }
581 
test_xsredp(void)582 static void test_xsredp(void)
583 {
584    __asm__ __volatile__ ("xsredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
585 }
586 
test_xvrsqrtedp(void)587 static void test_xvrsqrtedp(void)
588 {
589    __asm__ __volatile__ ("xvrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
590 }
591 
test_xsrsqrtedp(void)592 static void test_xsrsqrtedp(void)
593 {
594    __asm__ __volatile__ ("xsrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
595 }
596 
test_xvrsqrtesp(void)597 static void test_xvrsqrtesp(void)
598 {
599    __asm__ __volatile__ ("xvrsqrtesp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600 }
601 
test_xstsqrtdp(void)602 static void test_xstsqrtdp(void)
603 {
604    __asm__ __volatile__ ("xstsqrtdp   cr1, %x0" : : "wa" (vec_inB));
605 }
606 
test_xvtsqrtdp(void)607 static void test_xvtsqrtdp(void)
608 {
609    __asm__ __volatile__ ("xvtsqrtdp   cr1, %x0" : : "wa" (vec_inB));
610 }
611 
test_xvtsqrtsp(void)612 static void test_xvtsqrtsp(void)
613 {
614    __asm__ __volatile__ ("xvtsqrtsp   cr1, %x0" : : "wa" (vec_inB));
615 }
616 
test_xvsqrtdp(void)617 static void test_xvsqrtdp(void)
618 {
619    __asm__ __volatile__ ("xvsqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
620 }
621 
test_xvsqrtsp(void)622 static void test_xvsqrtsp(void)
623 {
624    __asm__ __volatile__ ("xvsqrtsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
625 }
626 
test_xvtdivdp(void)627 static void test_xvtdivdp(void)
628 {
629    __asm__ __volatile__ ("xvtdivdp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
630 }
631 
test_xvtdivsp(void)632 static void test_xvtdivsp(void)
633 {
634    __asm__ __volatile__ ("xvtdivsp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
635 }
636 
test_xscvdpsp(void)637 static void test_xscvdpsp(void)
638 {
639    __asm__ __volatile__ ("xscvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
640 }
641 
test_xscvdpuxws(void)642 static void test_xscvdpuxws(void)
643 {
644    __asm__ __volatile__ ("xscvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
645 }
646 
test_xscvspdp(void)647 static void test_xscvspdp(void)
648 {
649    __asm__ __volatile__ ("xscvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
650 }
651 
test_xvcvdpsp(void)652 static void test_xvcvdpsp(void)
653 {
654    __asm__ __volatile__ ("xvcvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
655 }
656 
test_xvcvdpuxds(void)657 static void test_xvcvdpuxds(void)
658 {
659    __asm__ __volatile__ ("xvcvdpuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
660 }
661 
test_xvcvdpuxws(void)662 static void test_xvcvdpuxws(void)
663 {
664    __asm__ __volatile__ ("xvcvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
665 }
666 
test_xvcvspdp(void)667 static void test_xvcvspdp(void)
668 {
669    __asm__ __volatile__ ("xvcvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
670 }
671 
test_xvcvspsxds(void)672 static void test_xvcvspsxds(void)
673 {
674    __asm__ __volatile__ ("xvcvspsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
675 }
676 
test_xvcvspuxds(void)677 static void test_xvcvspuxds(void)
678 {
679    __asm__ __volatile__ ("xvcvspuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
680 }
681 
test_xvcvdpsxds(void)682 static void test_xvcvdpsxds(void)
683 {
684    __asm__ __volatile__ ("xvcvdpsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
685 }
686 
test_xvcvspuxws(void)687 static void test_xvcvspuxws(void)
688 {
689    __asm__ __volatile__ ("xvcvspuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
690 }
691 
test_xvcvsxddp(void)692 static void test_xvcvsxddp(void)
693 {
694    __asm__ __volatile__ ("xvcvsxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
695 }
696 
test_xvcvuxddp(void)697 static void test_xvcvuxddp(void)
698 {
699    __asm__ __volatile__ ("xvcvuxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
700 }
701 
test_xvcvsxdsp(void)702 static void test_xvcvsxdsp(void)
703 {
704    __asm__ __volatile__ ("xvcvsxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
705 }
706 
test_xvcvuxdsp(void)707 static void test_xvcvuxdsp(void)
708 {
709    __asm__ __volatile__ ("xvcvuxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
710 }
711 
test_xvcvsxwdp(void)712 static void test_xvcvsxwdp(void)
713 {
714    __asm__ __volatile__ ("xvcvsxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
715 }
716 
test_xvcvuxwdp(void)717 static void test_xvcvuxwdp(void)
718 {
719    __asm__ __volatile__ ("xvcvuxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
720 }
721 
test_xvcvsxwsp(void)722 static void test_xvcvsxwsp(void)
723 {
724    __asm__ __volatile__ ("xvcvsxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
725 }
726 
test_xvcvuxwsp(void)727 static void test_xvcvuxwsp(void)
728 {
729    __asm__ __volatile__ ("xvcvuxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
730 }
731 
test_xsrdpic(void)732 static void test_xsrdpic(void)
733 {
734    __asm__ __volatile__ ("xsrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
735 }
736 
test_xsrdpiz(void)737 static void test_xsrdpiz(void)
738 {
739    __asm__ __volatile__ ("xsrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
740 }
741 
test_xsrdpi(void)742 static void test_xsrdpi(void)
743 {
744    __asm__ __volatile__ ("xsrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
745 }
746 
test_xvabsdp(void)747 static void test_xvabsdp(void)
748 {
749    __asm__ __volatile__ ("xvabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
750 }
751 
test_xvnabsdp(void)752 static void test_xvnabsdp(void)
753 {
754    __asm__ __volatile__ ("xvnabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
755 }
756 
test_xvnegdp(void)757 static void test_xvnegdp(void)
758 {
759    __asm__ __volatile__ ("xvnegdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
760 }
761 
test_xvabssp(void)762 static void test_xvabssp(void)
763 {
764    __asm__ __volatile__ ("xvabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
765 }
766 
test_xvnabssp(void)767 static void test_xvnabssp(void)
768 {
769    __asm__ __volatile__ ("xvnabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
770 }
771 
test_xvrdpi(void)772 static void test_xvrdpi(void)
773 {
774    __asm__ __volatile__ ("xvrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
775 }
776 
test_xvrdpic(void)777 static void test_xvrdpic(void)
778 {
779    __asm__ __volatile__ ("xvrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
780 }
781 
test_xvrdpim(void)782 static void test_xvrdpim(void)
783 {
784    __asm__ __volatile__ ("xvrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
785 }
786 
test_xvrdpip(void)787 static void test_xvrdpip(void)
788 {
789    __asm__ __volatile__ ("xvrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
790 }
791 
test_xvrdpiz(void)792 static void test_xvrdpiz(void)
793 {
794    __asm__ __volatile__ ("xvrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
795 }
796 
test_xvrspi(void)797 static void test_xvrspi(void)
798 {
799    __asm__ __volatile__ ("xvrspi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
800 }
801 
test_xvrspic(void)802 static void test_xvrspic(void)
803 {
804    __asm__ __volatile__ ("xvrspic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
805 }
806 
test_xvrspim(void)807 static void test_xvrspim(void)
808 {
809    __asm__ __volatile__ ("xvrspim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
810 }
811 
test_xvrspip(void)812 static void test_xvrspip(void)
813 {
814    __asm__ __volatile__ ("xvrspip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
815 }
816 
test_xvrspiz(void)817 static void test_xvrspiz(void)
818 {
819    __asm__ __volatile__ ("xvrspiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
820 }
821 
822 static vx_fp_test_t
823 vsx_one_fp_arg_tests[] = {
824                                 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
825                                 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
826                                 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
827                                 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
828                                 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
829                                 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
830                                 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
831                                 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
832                                 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
833                                 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
834                                 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
835                                 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
836                                 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
837                                 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
838                                 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
839                                 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
840                                 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
841                                 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
842                                 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
843                                 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
844                                 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
845                                 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
846                                 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
847                                 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
848                                 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
849                                 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
850                                 { &test_xvrdpi,  "xvrdpi",  NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
851                                 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
852                                 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
853                                 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
854                                 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
855                                 { &test_xvrspi,  "xvrspi",  NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
856                                 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
857                                 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
858                                 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
859                                 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
860                                 { NULL, NULL, NULL, 0, 0, 0, NULL}
861 };
862 
863 static vx_fp_test_t
864 vx_tdivORtsqrt_tests[] = {
865                           { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
866                           { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
867                           { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
868                           { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
869                           { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
870                           { NULL, NULL, NULL, 0 , 0, 0, NULL}
871 };
872 
873 static unsigned long long doubleWord[] = { 0,
874                                   0xffffffff00000000LL,
875                                   0x00000000ffffffffLL,
876                                   0xffffffffffffffffLL,
877                                   0x89abcde123456789LL,
878                                   0x0102030405060708LL,
879                                   0x00000000a0b1c2d3LL,
880                                   0x1111222233334444LL
881 };
882 
883 static unsigned int singleWord[] = {0,
884                                   0xffff0000,
885                                   0x0000ffff,
886                                   0xffffffff,
887                                   0x89a73522,
888                                   0x01020304,
889                                   0x0000abcd,
890                                   0x11223344
891 };
892 
893 typedef struct vx_intToFp_test
894 {
895    test_func_t test_func;
896    const char * name;
897    void * targs;
898    int num_tests;
899    precision_type_t precision;
900    vx_fp_test_type type;
901 } vx_intToFp_test_t;
902 
903 static vx_intToFp_test_t
904 intToFp_tests[] = {
905                    { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
906                    { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
907                    { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
908                    { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
909                    { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
910                    { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
911                    { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
912                    { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
913                    { NULL, NULL, NULL, 0, 0 }
914 };
915 
916 static Bool do_OE;
917 typedef enum {
918    DIV_BASE = 1,
919    DIV_OE = 2,
920    DIV_DOT = 4,
921 } div_type_t;
922 /* Possible divde type combinations are:
923  *   - base
924  *   - base+dot
925  *   - base+OE
926  *   - base+OE+dot
927  */
928 #ifdef __powerpc64__
test_divdeu(void)929 static void test_divdeu(void)
930 {
931    int divdeu_type = DIV_BASE;
932    if (do_OE)
933       divdeu_type |= DIV_OE;
934    if (do_dot)
935       divdeu_type |= DIV_DOT;
936 
937    switch (divdeu_type) {
938       case 1:
939         SET_CR_XER_ZERO;
940          __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
941          GET_CR_XER(div_flags, div_xer);
942          break;
943       case 3:
944         SET_CR_XER_ZERO;
945          __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
946          GET_CR_XER(div_flags, div_xer);
947          break;
948       case 5:
949         SET_CR_XER_ZERO;
950          __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
951          GET_CR_XER(div_flags, div_xer);
952          break;
953       case 7:
954         SET_CR_XER_ZERO;
955          __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
956          GET_CR_XER(div_flags, div_xer);
957          break;
958       default:
959          fprintf(stderr, "Invalid divdeu type. Exiting\n");
960          exit(1);
961    }
962 }
963 #endif
964 
test_divwe(void)965 static void test_divwe(void)
966 {
967    int divwe_type = DIV_BASE;
968    if (do_OE)
969       divwe_type |= DIV_OE;
970    if (do_dot)
971       divwe_type |= DIV_DOT;
972 
973    switch (divwe_type) {
974       case 1:
975         SET_CR_XER_ZERO;
976          __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
977          GET_CR_XER(div_flags, div_xer);
978          break;
979       case 3:
980         SET_CR_XER_ZERO;
981          __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
982          GET_CR_XER(div_flags, div_xer);
983          break;
984       case 5:
985         SET_CR_XER_ZERO;
986          __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
987          GET_CR_XER(div_flags, div_xer);
988          break;
989       case 7:
990         SET_CR_XER_ZERO;
991          __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
992          GET_CR_XER(div_flags, div_xer);
993          break;
994       default:
995          fprintf(stderr, "Invalid divweu type. Exiting\n");
996          exit(1);
997    }
998 }
999 
1000 
1001 typedef struct simple_test {
1002    test_func_t test_func;
1003    char * name;
1004    precision_type_t precision;
1005 } simple_test_t;
1006 
1007 
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1008 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1009 {
1010    int a_idx, b_idx, i;
1011    void * inA, * inB;
1012    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1013 
1014    for (i = 0; i < 4; i++) {
1015       a_idx = targs->fra_idx;
1016       b_idx = targs->frb_idx;
1017       inA = (void *)&spec_sp_fargs[a_idx];
1018       inB = (void *)&spec_sp_fargs[b_idx];
1019       // copy single precision FP  into vector element i
1020       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1021       memcpy(vec_src + (i * 4), inB, 4);
1022       targs++;
1023    }
1024 }
1025 
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1026 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1027 {
1028    int a_idx, b_idx, i;
1029    void * inA, * inB;
1030    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1031 
1032    for (i = 0; i < 2; i++) {
1033       a_idx = targs->fra_idx;
1034       b_idx = targs->frb_idx;
1035       inA = (void *)&spec_fargs[a_idx];
1036       inB = (void *)&spec_fargs[b_idx];
1037       // copy double precision FP  into vector element i
1038       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1039       memcpy(vec_src + (i * 8), inB, 8);
1040       targs++;
1041    }
1042 }
1043 
1044 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1045 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1046 {
1047    int a_idx, b_idx, k;
1048    char * name = malloc(20);
1049    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1050    int loops = dp ? 2 : 4;
1051    fp_test_args_t * targs = &test_group->targs[i];
1052    unsigned long long * frA_dp, * frB_dp, * dst_dp;
1053    unsigned int * frA_sp, *frB_sp, * dst_sp;
1054    strcpy(name, test_group->name);
1055    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1056    for (k = 0; k < loops; k++) {
1057       a_idx = targs->fra_idx;
1058       b_idx = targs->frb_idx;
1059       if (k)
1060          printf(" AND ");
1061       if (dp) {
1062          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1063          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1064          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1065       } else {
1066          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1067          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1068          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1069       }
1070       targs++;
1071    }
1072    if (cc != VX_NOT_CMP_OP)
1073       printf(" ? cc=%x", cc);
1074 
1075    if (print_vec_out) {
1076       if (dp) {
1077          dst_dp = (unsigned long long *) &vec_out;
1078          printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1079       } else {
1080          dst_sp = (unsigned int *) &vec_out;
1081          printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1082       }
1083    } else {
1084       printf("\n");
1085    }
1086    free(name);
1087 }
1088 
1089 
1090 
test_vsx_one_fp_arg(void)1091 static void test_vsx_one_fp_arg(void)
1092 {
1093    test_func_t func;
1094    int k;
1095    k = 0;
1096    build_special_fargs_table();
1097 
1098    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1099       int idx, i;
1100       vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1101       Bool estimate = (test_group.type == VX_ESTIMATE);
1102       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1103       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1104       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1105       Bool sparse_sp = False;
1106       int stride = dp ? 2 : 4;
1107       int loops = is_scalar ? 1 : stride;
1108       stride = is_scalar ? 1: stride;
1109 
1110       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1111        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1112        *                     or
1113        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1114        *
1115        * For the vector op case, we need to adjust stride from '4' to '2', since
1116        * we'll only be loading two values per loop into the input register.
1117        */
1118       if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1119          sparse_sp = True;
1120          stride = 2;
1121       }
1122 
1123       for (i = 0; i < test_group.num_tests; i+=stride) {
1124          unsigned int * pv;
1125          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1126 
1127          pv = (unsigned int *)&vec_out;
1128          // clear vec_out
1129          for (idx = 0; idx < 4; idx++, pv++)
1130             *pv = 0;
1131 
1132          if (dp) {
1133             int j;
1134             unsigned long long * frB_dp, *dst_dp;
1135             for (j = 0; j < loops; j++) {
1136                inB = (void *)&spec_fargs[i + j];
1137                // copy double precision FP into vector element i
1138                if (isLE && is_scalar)
1139                   vecB_void_ptr += 8;
1140                memcpy(vecB_void_ptr + (j * 8), inB, 8);
1141             }
1142             // execute test insn
1143             (*func)();
1144             dst_dp = (unsigned long long *) &vec_out;
1145             if (isLE && is_scalar)
1146                dst_dp++;
1147             printf("#%d: %s ", i/stride, test_group.name);
1148             for (j = 0; j < loops; j++) {
1149                if (j)
1150                   printf("; ");
1151                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1152                printf("%s(%016llx)", test_group.op, *frB_dp);
1153                if (estimate) {
1154                   Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1155                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1156                   /* For debugging . . .
1157                    printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1158                    */
1159                } else {
1160                   vx_fp_test_type type = test_group.type;
1161                   switch (type) {
1162                      case VX_SCALAR_CONV_TO_WORD:
1163                         printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1164                         break;
1165                      case VX_CONV_TO_SINGLE:
1166                         printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1167                         break;
1168                      default:  // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1169                         printf(" = %016llx", dst_dp[j]);
1170                   }
1171                }
1172             }
1173             printf("\n");
1174          } else {
1175             int j;
1176             unsigned int * frB_sp, * dst_sp = NULL;
1177             unsigned long long * dst_dp = NULL;
1178             if (sparse_sp)
1179                loops = 2;
1180             for (j = 0; j < loops; j++) {
1181                inB = (void *)&spec_sp_fargs[i + j];
1182                // copy single precision FP into vector element i
1183                if (sparse_sp) {
1184                   if (isLE)
1185                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1186                   else
1187                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1188                } else {
1189                   if (isLE && is_scalar)
1190                      vecB_void_ptr += 12;
1191                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1192                }
1193             }
1194             // execute test insn
1195             (*func)();
1196             if (test_group.type == VX_CONV_TO_DOUBLE) {
1197                dst_dp = (unsigned long long *) &vec_out;
1198                if (isLE && is_scalar)
1199                   dst_dp++;
1200             } else {
1201                dst_sp = (unsigned int *) &vec_out;
1202                if (isLE && is_scalar)
1203                   dst_sp += 3;
1204             }
1205             // print result
1206             printf("#%d: %s ", i/stride, test_group.name);
1207             for (j = 0; j < loops; j++) {
1208                if (j)
1209                   printf("; ");
1210                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1211                printf("%s(%08x)", test_group.op, *frB_sp);
1212                if (estimate) {
1213                   Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1214                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1215                } else {
1216                   if (test_group.type == VX_CONV_TO_DOUBLE)
1217                         printf(" = %016llx", dst_dp[j]);
1218                   else
1219                   /* Special case: Current VEX implementation for fsqrts (single precision)
1220                    * uses the same implementation as that used for double precision fsqrt.
1221                    * However, I've found that for xvsqrtsp, the result from that implementation
1222                    * may be off by the two LSBs.  Generally, even this small inaccuracy can cause the
1223                    * output to appear very different if you end up with a carry.  But for the given
1224                    * inputs in this testcase, we can simply mask out these bits.
1225                    */
1226                      printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1227                }
1228             }
1229             printf("\n");
1230          }
1231       }
1232       k++;
1233       printf( "\n" );
1234    }
1235 }
1236 
test_int_to_fp_convert(void)1237 static void test_int_to_fp_convert(void)
1238 {
1239    test_func_t func;
1240    int k;
1241    k = 0;
1242 
1243    while ((func = intToFp_tests[k].test_func)) {
1244       int idx, i;
1245       vx_intToFp_test_t test_group = intToFp_tests[k];
1246       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1247       Bool sparse_sp = False;
1248       int stride = dp ? 2 : 4;
1249       int loops = stride;
1250 
1251       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1252        *    |___ int___|_Unused_|___int___|__Unused__|   // for vector op
1253        *                     or
1254        * We need to adjust stride from '4' to '2', since we'll only be loading
1255        * two values per loop into the input register.
1256        */
1257       if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1258          sparse_sp = True;
1259          stride = 2;
1260       }
1261 
1262       for (i = 0; i < test_group.num_tests; i+=stride) {
1263          unsigned int * pv;
1264          void * inB;
1265 
1266          pv = (unsigned int *)&vec_out;
1267          // clear vec_out
1268          for (idx = 0; idx < 4; idx++, pv++)
1269             *pv = 0;
1270 
1271          if (dp) {
1272             int j;
1273             unsigned long long  *dst_dw, * targs = test_group.targs;
1274             for (j = 0; j < loops; j++) {
1275                inB = (void *)&targs[i + j];
1276                // copy doubleword into vector element i
1277                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1278             }
1279             // execute test insn
1280             (*func)();
1281             dst_dw = (unsigned long long *) &vec_out;
1282             printf("#%d: %s ", i/stride, test_group.name);
1283             for (j = 0; j < loops; j++) {
1284                if (j)
1285                   printf("; ");
1286                printf("conv(%016llx)", targs[i + j]);
1287 
1288                if (test_group.type == VX_CONV_TO_SINGLE)
1289                   printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1290                else
1291                   printf(" = %016llx", dst_dw[j]);
1292             }
1293             printf("\n");
1294          } else {
1295             int j;
1296             unsigned int * dst_sp = NULL;
1297             unsigned int * targs = test_group.targs;
1298             unsigned long long * dst_dp = NULL;
1299             void * vecB_void_ptr = (void *)&vec_inB;
1300             if (sparse_sp)
1301                loops = 2;
1302             for (j = 0; j < loops; j++) {
1303                inB = (void *)&targs[i + j];
1304                // copy single word into vector element i
1305                if (sparse_sp) {
1306                   if (isLE)
1307                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1308                   else
1309                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1310                } else {
1311                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1312                }
1313             }
1314             // execute test insn
1315             (*func)();
1316             if (test_group.type == VX_CONV_TO_DOUBLE)
1317                dst_dp = (unsigned long long *) &vec_out;
1318             else
1319                dst_sp = (unsigned int *) &vec_out;
1320             // print result
1321             printf("#%d: %s ", i/stride, test_group.name);
1322             for (j = 0; j < loops; j++) {
1323                if (j)
1324                   printf("; ");
1325                printf("conv(%08x)", targs[i + j]);
1326                if (test_group.type == VX_CONV_TO_DOUBLE)
1327                   printf(" = %016llx", dst_dp[j]);
1328                else
1329                   printf(" = %08x", dst_sp[j]);
1330             }
1331             printf("\n");
1332          }
1333       }
1334       k++;
1335       printf( "\n" );
1336    }
1337 }
1338 
1339 
1340 
1341 // The div doubleword test data
1342 signed long long div_dw_tdata[13][2] = {
1343                                        { 4, -4 },
1344                                        { 4, -3 },
1345                                        { 4, 4 },
1346                                        { 4, -5 },
1347                                        { 3, 8 },
1348                                        { 0x8000000000000000ULL, 0xa },
1349                                        { 0x50c, -1 },
1350                                        { 0x50c, -4096 },
1351                                        { 0x1234fedc, 0x8000a873 },
1352                                        { 0xabcd87651234fedcULL, 0xa123b893 },
1353                                        { 0x123456789abdcULL, 0 },
1354                                        { 0, 2 },
1355                                        { 0x77, 0xa3499 }
1356 };
1357 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1358 
1359 // The div word test data
1360 unsigned int div_w_tdata[6][2] = {
1361                               { 0, 2 },
1362                               { 2, 0 },
1363                               { 0x7abc1234, 0xf0000000 },
1364                               { 0xfabc1234, 5 },
1365                               { 77, 66 },
1366                               { 5, 0xfabc1234 },
1367 };
1368 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1369 
1370 typedef struct div_ext_test
1371 {
1372    test_func_t test_func;
1373    const char *name;
1374    int num_tests;
1375    div_type_t div_type;
1376    precision_type_t precision;
1377 } div_ext_test_t;
1378 
1379 static div_ext_test_t div_tests[] = {
1380 #ifdef __powerpc64__
1381                                    { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1382                                    { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1383 #endif
1384                                    { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1385                                    { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1386                                    { NULL, NULL, 0, 0, 0 }
1387 };
1388 
test_div_extensions(void)1389 static void test_div_extensions(void)
1390 {
1391    test_func_t func;
1392    int k;
1393    k = 0;
1394 
1395    while ((func = div_tests[k].test_func)) {
1396       int i, repeat = 1;
1397       div_ext_test_t test_group = div_tests[k];
1398       do_dot = False;
1399 
1400 again:
1401       for (i = 0; i < test_group.num_tests; i++) {
1402          unsigned int condreg;
1403 
1404          if (test_group.div_type == DIV_OE)
1405             do_OE = True;
1406          else
1407             do_OE = False;
1408 
1409          if (test_group.precision == DOUBLE_TEST) {
1410             r14 = div_dw_tdata[i][0];
1411             r15 = div_dw_tdata[i][1];
1412          } else {
1413             r14 = div_w_tdata[i][0];
1414             r15 = div_w_tdata[i][1];
1415          }
1416          // execute test insn
1417          (*func)();
1418          condreg = (div_flags & 0xf0000000) >> 28;
1419          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1420          if (test_group.precision == DOUBLE_TEST) {
1421             printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1422                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1423          } else {
1424             printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1425                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1426          }
1427          printf(" CR=%x; XER=%x\n", condreg, div_xer);
1428       }
1429       printf("\n");
1430       if (repeat) {
1431          repeat = 0;
1432          do_dot = True;
1433          goto again;
1434       }
1435       k++;
1436       printf( "\n" );
1437    }
1438 }
1439 
1440 
test_vx_tdivORtsqrt(void)1441 static void test_vx_tdivORtsqrt(void)
1442 {
1443    test_func_t func;
1444    int k, crx;
1445    unsigned int flags;
1446    k = 0;
1447    do_dot = False;
1448    build_special_fargs_table();
1449 
1450    while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1451       int idx, i;
1452       vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1453       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1454       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1455       Bool two_args = test_group.targs ?  True : False;
1456       int stride = dp ? 2 : 4;
1457       int loops = is_scalar ? 1 : stride;
1458       stride = is_scalar ? 1: stride;
1459 
1460       for (i = 0; i < test_group.num_tests; i+=stride) {
1461          unsigned int * pv;
1462          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1463 
1464          pv = (unsigned int *)&vec_out;
1465          // clear vec_out
1466          for (idx = 0; idx < 4; idx++, pv++)
1467             *pv = 0;
1468 
1469          if (dp) {
1470             int j;
1471             unsigned long long * frB_dp;
1472             if (two_args) {
1473                setup_dp_fp_args(&test_group.targs[i], False);
1474             } else {
1475                for (j = 0; j < loops; j++) {
1476                   inB = (void *)&spec_fargs[i + j];
1477                   // copy double precision FP into vector element i
1478                   if (isLE && is_scalar)
1479                      vecB_void_ptr += 8;
1480                   memcpy(vecB_void_ptr + (j * 8), inB, 8);
1481                }
1482             }
1483             // execute test insn
1484             // Must do set/get of CRs immediately before/after calling the asm func
1485             // to avoid CRs being modified by other instructions.
1486             SET_FPSCR_ZERO;
1487             SET_CR_XER_ZERO;
1488             (*func)();
1489             GET_CR(flags);
1490             // assumes using CR1
1491             crx = (flags & 0x0f000000) >> 24;
1492             if (two_args) {
1493                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1494             } else {
1495                printf("#%d: %s ", i/stride, test_group.name);
1496                for (j = 0; j < loops; j++) {
1497                   if (j)
1498                      printf("; ");
1499                   frB_dp = (unsigned long long *)&spec_fargs[i + j];
1500                   printf("%s(%016llx)", test_group.op, *frB_dp);
1501                }
1502                printf( " ? %x (CRx)\n", crx);
1503             }
1504          } else {
1505             int j;
1506             unsigned int * frB_sp;
1507             if (two_args) {
1508                setup_sp_fp_args(&test_group.targs[i], False);
1509             } else {
1510                for (j = 0; j < loops; j++) {
1511                   inB = (void *)&spec_sp_fargs[i + j];
1512                   // copy single precision FP into vector element i
1513                   memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1514                }
1515             }
1516             // execute test insn
1517             SET_FPSCR_ZERO;
1518             SET_CR_XER_ZERO;
1519             (*func)();
1520             GET_CR(flags);
1521             crx = (flags & 0x0f000000) >> 24;
1522             // print result
1523             if (two_args) {
1524                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1525             } else {
1526                printf("#%d: %s ", i/stride, test_group.name);
1527                for (j = 0; j < loops; j++) {
1528                   if (j)
1529                      printf("; ");
1530                   frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1531                   printf("%s(%08x)", test_group.op, *frB_sp);
1532                }
1533                printf( " ? %x (CRx)\n", crx);
1534             }
1535          }
1536       }
1537       k++;
1538       printf( "\n" );
1539    }
1540 }
1541 
1542 
test_ftsqrt(void)1543 static void test_ftsqrt(void)
1544 {
1545    int i, crx;
1546    unsigned int flags;
1547    unsigned long long * frbp;
1548    build_special_fargs_table();
1549 
1550 
1551    for (i = 0; i < nb_special_fargs; i++) {
1552       f14 = spec_fargs[i];
1553       frbp = (unsigned long long *)&spec_fargs[i];
1554       SET_FPSCR_ZERO;
1555       SET_CR_XER_ZERO;
1556       __asm__ __volatile__ ("ftsqrt           cr1, %0" : : "d" (f14));
1557       GET_CR(flags);
1558       crx = (flags & 0x0f000000) >> 24;
1559       printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1560    }
1561    printf( "\n" );
1562 }
1563 
1564 static void
test_popcntw(void)1565 test_popcntw(void)
1566 {
1567 #ifdef __powerpc64__
1568    uint64_t res;
1569    unsigned long long src = 0x9182736405504536ULL;
1570    r14 = src;
1571    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1572    printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1573 #else
1574    uint32_t res;
1575    unsigned int src = 0x9182730E;
1576    r14 = src;
1577    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1578    printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1579 #endif
1580    printf( "\n" );
1581 }
1582 
1583 
1584 static test_table_t
1585          all_tests[] =
1586 {
1587 
1588                     { &test_vsx_one_fp_arg,
1589                       "Test VSX vector and scalar single argument instructions"} ,
1590                     { &test_int_to_fp_convert,
1591                       "Test VSX vector integer to float conversion instructions" },
1592                     { &test_div_extensions,
1593                        "Test div extensions" },
1594                     { &test_ftsqrt,
1595                        "Test ftsqrt instruction" },
1596                     { &test_vx_tdivORtsqrt,
1597                        "Test vector and scalar tdiv and tsqrt instructions" },
1598                     { &test_popcntw,
1599                        "Test popcntw instruction" },
1600                     { NULL, NULL }
1601 };
1602 #endif // HAS_VSX
1603 
main(int argc,char * argv[])1604 int main(int argc, char *argv[])
1605 {
1606 #ifdef HAS_VSX
1607 
1608    test_table_t aTest;
1609    test_func_t func;
1610    int i = 0;
1611 
1612    while ((func = all_tests[i].test_category)) {
1613       aTest = all_tests[i];
1614       printf( "%s\n", aTest.name );
1615       (*func)();
1616       i++;
1617    }
1618    if (spec_fargs)
1619      free(spec_fargs);
1620    if (spec_sp_fargs)
1621      free(spec_sp_fargs);
1622 
1623 #endif // HAS _VSX
1624 
1625    return 0;
1626 }
1627