1 /* Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23 #ifdef HAS_VSX
24
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <malloc.h>
30 #include <altivec.h>
31 #include <math.h>
32
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
38
39 #ifdef VGP_ppc64le_linux
40 #define isLE 1
41 #else
42 #define isLE 0
43 #endif
44
45 typedef unsigned char Bool;
46 #define True 1
47 #define False 0
48 register HWord_t r14 __asm__ ("r14");
49 register HWord_t r15 __asm__ ("r15");
50 register HWord_t r16 __asm__ ("r16");
51 register HWord_t r17 __asm__ ("r17");
52 register double f14 __asm__ ("fr14");
53 register double f15 __asm__ ("fr15");
54 register double f16 __asm__ ("fr16");
55 register double f17 __asm__ ("fr17");
56
57 static volatile unsigned int div_flags, div_xer;
58
59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
60
61 #define SET_CR(_arg) \
62 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
63
64 #define SET_XER(_arg) \
65 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
66
67 #define GET_CR(_lval) \
68 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
69
70 #define GET_XER(_lval) \
71 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
72
73 #define GET_CR_XER(_lval_cr,_lval_xer) \
74 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
75
76 #define SET_CR_ZERO \
77 SET_CR(0)
78
79 #define SET_XER_ZERO \
80 SET_XER(0)
81
82 #define SET_CR_XER_ZERO \
83 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
84
85 #define SET_FPSCR_ZERO \
86 do { double _d = 0.0; \
87 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
88 } while (0)
89
90
91 typedef void (*test_func_t)(void);
92 typedef struct test_table test_table_t;
93
94
95 /* These functions below that construct a table of floating point
96 * values were lifted from none/tests/ppc32/jm-insns.c.
97 */
98
99 #if defined (DEBUG_ARGS_BUILD)
100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
101 #else
102 #define AB_DPRINTF(fmt, args...) do { } while (0)
103 #endif
104
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)105 static inline void register_farg (void *farg,
106 int s, uint16_t _exp, uint64_t mant)
107 {
108 uint64_t tmp;
109
110 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
111 *(uint64_t *)farg = tmp;
112 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
113 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
114 }
115
register_sp_farg(void * farg,int s,uint16_t _exp,uint32_t mant)116 static inline void register_sp_farg (void *farg,
117 int s, uint16_t _exp, uint32_t mant)
118 {
119 uint32_t tmp;
120 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
121 *(uint32_t *)farg = tmp;
122 }
123
124
125 typedef struct fp_test_args {
126 int fra_idx;
127 int frb_idx;
128 } fp_test_args_t;
129
130
131 fp_test_args_t two_arg_fp_tests[] = {
132 {8, 8},
133 {8, 14},
134 {15, 16},
135 {8, 5},
136 {8, 4},
137 {8, 7},
138 {8, 9},
139 {8, 11},
140 {14, 8},
141 {14, 14},
142 {14, 6},
143 {14, 5},
144 {14, 4},
145 {14, 7},
146 {14, 9},
147 {14, 11},
148 {6, 8},
149 {6, 14},
150 {6, 6},
151 {6, 5},
152 {6, 4},
153 {6, 7},
154 {6, 9},
155 {6, 11},
156 {5, 8},
157 {5, 14},
158 {5, 6},
159 {5, 5},
160 {5, 4},
161 {5, 7},
162 {5, 9},
163 {5, 11},
164 {4, 8},
165 {4, 14},
166 {4, 6},
167 {4, 5},
168 {4, 1},
169 {4, 7},
170 {4, 9},
171 {4, 11},
172 {7, 8},
173 {7, 14},
174 {7, 6},
175 {7, 5},
176 {7, 4},
177 {7, 7},
178 {7, 9},
179 {7, 11},
180 {10, 8},
181 {10, 14},
182 {12, 6},
183 {12, 5},
184 {10, 4},
185 {10, 7},
186 {10, 9},
187 {10, 11},
188 {12, 8 },
189 {12, 14},
190 {12, 6},
191 {15, 16},
192 {15, 16},
193 {9, 11},
194 {11, 11},
195 {11, 12},
196 {16, 18},
197 {17, 16},
198 {19, 19},
199 {19, 18}
200 };
201
202
203 static int nb_special_fargs;
204 static double * spec_fargs;
205 static float * spec_sp_fargs;
206
build_special_fargs_table(void)207 static void build_special_fargs_table(void)
208 {
209 /*
210 Entry Sign Exp fraction Special value
211 0 0 3fd 0x8000000000000ULL Positive finite number
212 1 0 404 0xf000000000000ULL ...
213 2 0 001 0x8000000b77501ULL ...
214 3 0 7fe 0x800000000051bULL ...
215 4 0 012 0x3214569900000ULL ...
216 5 0 000 0x0000000000000ULL +0.0 (+zero)
217 6 1 000 0x0000000000000ULL -0.0 (-zero)
218 7 0 7ff 0x0000000000000ULL +infinity
219 8 1 7ff 0x0000000000000ULL -infinity
220 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
221 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
222 11 0 7ff 0x8000000000000ULL +QNaN
223 12 1 7ff 0x8000000000000ULL -QNaN
224 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
225 14 1 40d 0x0650f5a07b353ULL Negative finite number
226 15 0 412 0x32585a9900000ULL A few more positive finite numbers
227 16 0 413 0x82511a2000000ULL ...
228 17 . . . . . . . . . . . . . . . . . . . . . . .
229 18 . . . . . . . . . . . . . . . . . . . . . . .
230 19 . . . . . . . . . . . . . . . . . . . . . . .
231 */
232
233 uint64_t mant;
234 uint32_t mant_sp;
235 uint16_t _exp;
236 int s;
237 int j, i = 0;
238
239 if (spec_fargs)
240 return;
241
242 spec_fargs = malloc( 20 * sizeof(double) );
243 spec_sp_fargs = malloc( 20 * sizeof(float) );
244
245 // #0
246 s = 0;
247 _exp = 0x3fd;
248 mant = 0x8000000000000ULL;
249 register_farg(&spec_fargs[i++], s, _exp, mant);
250
251 // #1
252 s = 0;
253 _exp = 0x404;
254 mant = 0xf000000000000ULL;
255 register_farg(&spec_fargs[i++], s, _exp, mant);
256
257 // #2
258 s = 0;
259 _exp = 0x001;
260 mant = 0x8000000b77501ULL;
261 register_farg(&spec_fargs[i++], s, _exp, mant);
262
263 // #3
264 s = 0;
265 _exp = 0x7fe;
266 mant = 0x800000000051bULL;
267 register_farg(&spec_fargs[i++], s, _exp, mant);
268
269 // #4
270 s = 0;
271 _exp = 0x012;
272 mant = 0x3214569900000ULL;
273 register_farg(&spec_fargs[i++], s, _exp, mant);
274
275
276 /* Special values */
277 /* +0.0 : 0 0x000 0x0000000000000 */
278 // #5
279 s = 0;
280 _exp = 0x000;
281 mant = 0x0000000000000ULL;
282 register_farg(&spec_fargs[i++], s, _exp, mant);
283
284 /* -0.0 : 1 0x000 0x0000000000000 */
285 // #6
286 s = 1;
287 _exp = 0x000;
288 mant = 0x0000000000000ULL;
289 register_farg(&spec_fargs[i++], s, _exp, mant);
290
291 /* +infinity : 0 0x7FF 0x0000000000000 */
292 // #7
293 s = 0;
294 _exp = 0x7FF;
295 mant = 0x0000000000000ULL;
296 register_farg(&spec_fargs[i++], s, _exp, mant);
297
298 /* -infinity : 1 0x7FF 0x0000000000000 */
299 // #8
300 s = 1;
301 _exp = 0x7FF;
302 mant = 0x0000000000000ULL;
303 register_farg(&spec_fargs[i++], s, _exp, mant);
304
305 /*
306 * This comment applies to values #9 and #10 below:
307 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
308 * so we can't just copy the double-precision value to the corresponding slot in the
309 * single-precision array (i.e., in the loop at the end of this function). Instead, we
310 * have to manually set the bits using register_sp_farg().
311 */
312
313 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
314 // #9
315 s = 0;
316 _exp = 0x7FF;
317 mant = 0x7FFFFFFFFFFFFULL;
318 register_farg(&spec_fargs[i++], s, _exp, mant);
319 _exp = 0xff;
320 mant_sp = 0x3FFFFF;
321 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
322
323 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
324 // #10
325 s = 1;
326 _exp = 0x7FF;
327 mant = 0x7FFFFFFFFFFFFULL;
328 register_farg(&spec_fargs[i++], s, _exp, mant);
329 _exp = 0xff;
330 mant_sp = 0x3FFFFF;
331 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
332
333 /* +QNaN : 0 0x7FF 0x8000000000000 */
334 // #11
335 s = 0;
336 _exp = 0x7FF;
337 mant = 0x8000000000000ULL;
338 register_farg(&spec_fargs[i++], s, _exp, mant);
339
340 /* -QNaN : 1 0x7FF 0x8000000000000 */
341 // #12
342 s = 1;
343 _exp = 0x7FF;
344 mant = 0x8000000000000ULL;
345 register_farg(&spec_fargs[i++], s, _exp, mant);
346
347 /* denormalized value */
348 // #13
349 s = 1;
350 _exp = 0x000;
351 mant = 0x8340000078000ULL;
352 register_farg(&spec_fargs[i++], s, _exp, mant);
353
354 /* Negative finite number */
355 // #14
356 s = 1;
357 _exp = 0x40d;
358 mant = 0x0650f5a07b353ULL;
359 register_farg(&spec_fargs[i++], s, _exp, mant);
360
361 /* A few positive finite numbers ... */
362 // #15
363 s = 0;
364 _exp = 0x412;
365 mant = 0x32585a9900000ULL;
366 register_farg(&spec_fargs[i++], s, _exp, mant);
367
368 // #16
369 s = 0;
370 _exp = 0x413;
371 mant = 0x82511a2000000ULL;
372 register_farg(&spec_fargs[i++], s, _exp, mant);
373
374 // #17
375 s = 0;
376 _exp = 0x403;
377 mant = 0x12ef5a9300000ULL;
378 register_farg(&spec_fargs[i++], s, _exp, mant);
379
380 // #18
381 s = 0;
382 _exp = 0x405;
383 mant = 0x14bf5d2300000ULL;
384 register_farg(&spec_fargs[i++], s, _exp, mant);
385
386 // #19
387 s = 0;
388 _exp = 0x409;
389 mant = 0x76bf982440000ULL;
390 register_farg(&spec_fargs[i++], s, _exp, mant);
391
392 nb_special_fargs = i;
393 for (j = 0; j < i; j++) {
394 if (!(j == 9 || j == 10))
395 spec_sp_fargs[j] = spec_fargs[j];
396 }
397 }
398
399
400 struct test_table
401 {
402 test_func_t test_category;
403 char * name;
404 };
405
406 /* Type of input for floating point operations.*/
407 typedef enum {
408 SINGLE_TEST,
409 DOUBLE_TEST
410 } precision_type_t;
411
412 typedef enum {
413 VX_SCALAR_CONV_TO_WORD,
414 VX_CONV_TO_SINGLE,
415 VX_CONV_TO_DOUBLE,
416 VX_ESTIMATE,
417 VX_DEFAULT
418 } vx_fp_test_type;
419
420 static vector unsigned int vec_out, vec_inA, vec_inB;
421
422 /* This function is for checking the reciprocal and reciprocal square root
423 * estimate instructions.
424 */
check_estimate(precision_type_t type,Bool is_rsqrte,int idx,int output_vec_idx)425 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
426 {
427 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
428 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
429 * does an actual reciprocal calculation versus estimation, so the answer we get back from
430 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
431 * precision) and the estimate may still be within expected tolerances. On top of that,
432 * we can't count on these estimates always being the same across implementations.
433 * For example, with the fre[s] instruction (which should be correct to within one part
434 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
435 * one implementation could return 1.0111_1111_0000 and another implementation could return
436 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
437 * single bit in common.
438 *
439 * The upshot is we can't validate the VEX output for these instructions by comparing against
440 * stored bit patterns. We must check that the result is within expected tolerances.
441 */
442
443
444 /* A mask to be used for validation as a last resort.
445 * Only use 12 bits of precision for reasons discussed above.
446 */
447 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
448 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
449
450 Bool result = False;
451 Bool dp_test = type == DOUBLE_TEST;
452 double src_dp, res_dp;
453 float src_sp, res_sp;
454 src_dp = res_dp = 0;
455 src_sp = res_sp = 0;
456 #define SRC (dp_test ? src_dp : src_sp)
457 #define RES (dp_test ? res_dp : res_sp)
458 Bool src_is_negative = False;
459 Bool res_is_negative = False;
460 unsigned long long * dst_dp = NULL;
461 unsigned int * dst_sp = NULL;
462 if (dp_test) {
463 unsigned long long * src_dp_ull;
464 dst_dp = (unsigned long long *) &vec_out;
465 src_dp = spec_fargs[idx];
466 src_dp_ull = (unsigned long long *) &src_dp;
467 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
468 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
469 memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
470 } else {
471 unsigned int * src_sp_uint;
472 dst_sp = (unsigned int *) &vec_out;
473 src_sp = spec_sp_fargs[idx];
474 src_sp_uint = (unsigned int *) &src_sp;
475 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
476 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
477 memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
478 }
479
480 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
481 if (isnan(SRC))
482 return isnan(RES);
483 if (fpclassify(SRC) == FP_ZERO)
484 return isinf(RES);
485 if (!src_is_negative && isinf(SRC))
486 return !res_is_negative && (fpclassify(RES) == FP_ZERO);
487 if (is_rsqrte) {
488 if (src_is_negative)
489 return isnan(RES);
490 } else {
491 if (src_is_negative && isinf(SRC))
492 return res_is_negative && (fpclassify(RES) == FP_ZERO);
493 }
494 if (dp_test) {
495 double calc_diff;
496 double real_diff;
497 double recip_divisor;
498 double div_result;
499 double calc_diff_tmp;
500
501 if (is_rsqrte)
502 recip_divisor = sqrt(src_dp);
503 else
504 recip_divisor = src_dp;
505
506 div_result = 1.0/recip_divisor;
507 calc_diff_tmp = recip_divisor * 16384.0;
508 if (isnormal(calc_diff_tmp)) {
509 calc_diff = fabs(1.0/calc_diff_tmp);
510 real_diff = fabs(res_dp - div_result);
511 result = ( ( res_dp == div_result )
512 || ( real_diff <= calc_diff ) );
513 } else {
514 /* Unable to compute theoretical difference, so we fall back to masking out
515 * un-precise bits.
516 */
517 unsigned long long * div_result_dp = (unsigned long long *) &div_result;
518 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
519 }
520 /* For debug use . . .
521 if (!result) {
522 unsigned long long * dv = &div_result;
523 unsigned long long * rd = &real_diff;
524 unsigned long long * cd = &calc_diff;
525 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
526 *dv, *rd, *cd);
527 }
528 */
529 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
530 float calc_diff;
531 float real_diff;
532 float div_result;
533 float calc_diff_tmp;
534 float recip_divisor = sqrt(src_sp);
535
536 div_result = 1.0/recip_divisor;
537 calc_diff_tmp = recip_divisor * 16384.0;
538 if (isnormal(calc_diff_tmp)) {
539 calc_diff = fabsf(1.0/calc_diff_tmp);
540 real_diff = fabsf(res_sp - div_result);
541 result = ( ( res_sp == div_result )
542 || ( real_diff <= calc_diff ) );
543 } else {
544 /* Unable to compute theoretical difference, so we fall back to masking out
545 * un-precise bits.
546 */
547 unsigned int * div_result_sp = (unsigned int *) &div_result;
548 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
549 }
550 /* For debug use . . .
551 if (!result) {
552 unsigned long long * dv = &div_result;
553 unsigned long long * rd = &real_diff;
554 unsigned long long * cd = &calc_diff;
555 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
556 *dv, *rd, *cd);
557 }
558 */
559 }
560 return result;
561 }
562
563 typedef struct vx_fp_test
564 {
565 test_func_t test_func;
566 const char * name;
567 fp_test_args_t * targs;
568 int num_tests;
569 precision_type_t precision;
570 vx_fp_test_type type;
571 const char * op;
572 } vx_fp_test_t;
573
574
575 static Bool do_dot;
576
test_xvredp(void)577 static void test_xvredp(void)
578 {
579 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
580 }
581
test_xsredp(void)582 static void test_xsredp(void)
583 {
584 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
585 }
586
test_xvrsqrtedp(void)587 static void test_xvrsqrtedp(void)
588 {
589 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
590 }
591
test_xsrsqrtedp(void)592 static void test_xsrsqrtedp(void)
593 {
594 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
595 }
596
test_xvrsqrtesp(void)597 static void test_xvrsqrtesp(void)
598 {
599 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600 }
601
test_xstsqrtdp(void)602 static void test_xstsqrtdp(void)
603 {
604 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
605 }
606
test_xvtsqrtdp(void)607 static void test_xvtsqrtdp(void)
608 {
609 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
610 }
611
test_xvtsqrtsp(void)612 static void test_xvtsqrtsp(void)
613 {
614 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
615 }
616
test_xvsqrtdp(void)617 static void test_xvsqrtdp(void)
618 {
619 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
620 }
621
test_xvsqrtsp(void)622 static void test_xvsqrtsp(void)
623 {
624 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
625 }
626
test_xvtdivdp(void)627 static void test_xvtdivdp(void)
628 {
629 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
630 }
631
test_xvtdivsp(void)632 static void test_xvtdivsp(void)
633 {
634 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
635 }
636
test_xscvdpsp(void)637 static void test_xscvdpsp(void)
638 {
639 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
640 }
641
test_xscvdpuxws(void)642 static void test_xscvdpuxws(void)
643 {
644 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
645 }
646
test_xscvspdp(void)647 static void test_xscvspdp(void)
648 {
649 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
650 }
651
test_xvcvdpsp(void)652 static void test_xvcvdpsp(void)
653 {
654 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
655 }
656
test_xvcvdpuxds(void)657 static void test_xvcvdpuxds(void)
658 {
659 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
660 }
661
test_xvcvdpuxws(void)662 static void test_xvcvdpuxws(void)
663 {
664 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
665 }
666
test_xvcvspdp(void)667 static void test_xvcvspdp(void)
668 {
669 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
670 }
671
test_xvcvspsxds(void)672 static void test_xvcvspsxds(void)
673 {
674 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
675 }
676
test_xvcvspuxds(void)677 static void test_xvcvspuxds(void)
678 {
679 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
680 }
681
test_xvcvdpsxds(void)682 static void test_xvcvdpsxds(void)
683 {
684 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
685 }
686
test_xvcvspuxws(void)687 static void test_xvcvspuxws(void)
688 {
689 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
690 }
691
test_xvcvsxddp(void)692 static void test_xvcvsxddp(void)
693 {
694 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
695 }
696
test_xvcvuxddp(void)697 static void test_xvcvuxddp(void)
698 {
699 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
700 }
701
test_xvcvsxdsp(void)702 static void test_xvcvsxdsp(void)
703 {
704 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
705 }
706
test_xvcvuxdsp(void)707 static void test_xvcvuxdsp(void)
708 {
709 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
710 }
711
test_xvcvsxwdp(void)712 static void test_xvcvsxwdp(void)
713 {
714 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
715 }
716
test_xvcvuxwdp(void)717 static void test_xvcvuxwdp(void)
718 {
719 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
720 }
721
test_xvcvsxwsp(void)722 static void test_xvcvsxwsp(void)
723 {
724 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
725 }
726
test_xvcvuxwsp(void)727 static void test_xvcvuxwsp(void)
728 {
729 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
730 }
731
test_xsrdpic(void)732 static void test_xsrdpic(void)
733 {
734 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
735 }
736
test_xsrdpiz(void)737 static void test_xsrdpiz(void)
738 {
739 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
740 }
741
test_xsrdpi(void)742 static void test_xsrdpi(void)
743 {
744 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
745 }
746
test_xvabsdp(void)747 static void test_xvabsdp(void)
748 {
749 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
750 }
751
test_xvnabsdp(void)752 static void test_xvnabsdp(void)
753 {
754 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
755 }
756
test_xvnegdp(void)757 static void test_xvnegdp(void)
758 {
759 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
760 }
761
test_xvabssp(void)762 static void test_xvabssp(void)
763 {
764 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
765 }
766
test_xvnabssp(void)767 static void test_xvnabssp(void)
768 {
769 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
770 }
771
test_xvrdpi(void)772 static void test_xvrdpi(void)
773 {
774 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
775 }
776
test_xvrdpic(void)777 static void test_xvrdpic(void)
778 {
779 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
780 }
781
test_xvrdpim(void)782 static void test_xvrdpim(void)
783 {
784 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
785 }
786
test_xvrdpip(void)787 static void test_xvrdpip(void)
788 {
789 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
790 }
791
test_xvrdpiz(void)792 static void test_xvrdpiz(void)
793 {
794 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
795 }
796
test_xvrspi(void)797 static void test_xvrspi(void)
798 {
799 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
800 }
801
test_xvrspic(void)802 static void test_xvrspic(void)
803 {
804 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
805 }
806
test_xvrspim(void)807 static void test_xvrspim(void)
808 {
809 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
810 }
811
test_xvrspip(void)812 static void test_xvrspip(void)
813 {
814 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
815 }
816
test_xvrspiz(void)817 static void test_xvrspiz(void)
818 {
819 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
820 }
821
822 static vx_fp_test_t
823 vsx_one_fp_arg_tests[] = {
824 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
825 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
826 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
827 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
828 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
829 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
830 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
831 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
832 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
833 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
834 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
835 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
836 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
837 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
838 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
839 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
840 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
841 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
842 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
843 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
844 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
845 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
846 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
847 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
848 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
849 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
850 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
851 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
852 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
853 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
854 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
855 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
856 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
857 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
858 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
859 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
860 { NULL, NULL, NULL, 0, 0, 0, NULL}
861 };
862
863 static vx_fp_test_t
864 vx_tdivORtsqrt_tests[] = {
865 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
866 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
867 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
868 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
869 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
870 { NULL, NULL, NULL, 0 , 0, 0, NULL}
871 };
872
873 static unsigned long long doubleWord[] = { 0,
874 0xffffffff00000000LL,
875 0x00000000ffffffffLL,
876 0xffffffffffffffffLL,
877 0x89abcde123456789LL,
878 0x0102030405060708LL,
879 0x00000000a0b1c2d3LL,
880 0x1111222233334444LL
881 };
882
883 static unsigned int singleWord[] = {0,
884 0xffff0000,
885 0x0000ffff,
886 0xffffffff,
887 0x89a73522,
888 0x01020304,
889 0x0000abcd,
890 0x11223344
891 };
892
893 typedef struct vx_intToFp_test
894 {
895 test_func_t test_func;
896 const char * name;
897 void * targs;
898 int num_tests;
899 precision_type_t precision;
900 vx_fp_test_type type;
901 } vx_intToFp_test_t;
902
903 static vx_intToFp_test_t
904 intToFp_tests[] = {
905 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
906 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
907 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
908 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
909 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
910 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
911 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
912 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
913 { NULL, NULL, NULL, 0, 0 }
914 };
915
916 static Bool do_OE;
917 typedef enum {
918 DIV_BASE = 1,
919 DIV_OE = 2,
920 DIV_DOT = 4,
921 } div_type_t;
922 /* Possible divde type combinations are:
923 * - base
924 * - base+dot
925 * - base+OE
926 * - base+OE+dot
927 */
928 #ifdef __powerpc64__
test_divdeu(void)929 static void test_divdeu(void)
930 {
931 int divdeu_type = DIV_BASE;
932 if (do_OE)
933 divdeu_type |= DIV_OE;
934 if (do_dot)
935 divdeu_type |= DIV_DOT;
936
937 switch (divdeu_type) {
938 case 1:
939 SET_CR_XER_ZERO;
940 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
941 GET_CR_XER(div_flags, div_xer);
942 break;
943 case 3:
944 SET_CR_XER_ZERO;
945 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
946 GET_CR_XER(div_flags, div_xer);
947 break;
948 case 5:
949 SET_CR_XER_ZERO;
950 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
951 GET_CR_XER(div_flags, div_xer);
952 break;
953 case 7:
954 SET_CR_XER_ZERO;
955 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
956 GET_CR_XER(div_flags, div_xer);
957 break;
958 default:
959 fprintf(stderr, "Invalid divdeu type. Exiting\n");
960 exit(1);
961 }
962 }
963 #endif
964
test_divwe(void)965 static void test_divwe(void)
966 {
967 int divwe_type = DIV_BASE;
968 if (do_OE)
969 divwe_type |= DIV_OE;
970 if (do_dot)
971 divwe_type |= DIV_DOT;
972
973 switch (divwe_type) {
974 case 1:
975 SET_CR_XER_ZERO;
976 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
977 GET_CR_XER(div_flags, div_xer);
978 break;
979 case 3:
980 SET_CR_XER_ZERO;
981 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
982 GET_CR_XER(div_flags, div_xer);
983 break;
984 case 5:
985 SET_CR_XER_ZERO;
986 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
987 GET_CR_XER(div_flags, div_xer);
988 break;
989 case 7:
990 SET_CR_XER_ZERO;
991 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
992 GET_CR_XER(div_flags, div_xer);
993 break;
994 default:
995 fprintf(stderr, "Invalid divweu type. Exiting\n");
996 exit(1);
997 }
998 }
999
1000
1001 typedef struct simple_test {
1002 test_func_t test_func;
1003 char * name;
1004 precision_type_t precision;
1005 } simple_test_t;
1006
1007
setup_sp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1008 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1009 {
1010 int a_idx, b_idx, i;
1011 void * inA, * inB;
1012 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1013
1014 for (i = 0; i < 4; i++) {
1015 a_idx = targs->fra_idx;
1016 b_idx = targs->frb_idx;
1017 inA = (void *)&spec_sp_fargs[a_idx];
1018 inB = (void *)&spec_sp_fargs[b_idx];
1019 // copy single precision FP into vector element i
1020 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1021 memcpy(vec_src + (i * 4), inB, 4);
1022 targs++;
1023 }
1024 }
1025
setup_dp_fp_args(fp_test_args_t * targs,Bool swap_inputs)1026 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1027 {
1028 int a_idx, b_idx, i;
1029 void * inA, * inB;
1030 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1031
1032 for (i = 0; i < 2; i++) {
1033 a_idx = targs->fra_idx;
1034 b_idx = targs->frb_idx;
1035 inA = (void *)&spec_fargs[a_idx];
1036 inB = (void *)&spec_fargs[b_idx];
1037 // copy double precision FP into vector element i
1038 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1039 memcpy(vec_src + (i * 8), inB, 8);
1040 targs++;
1041 }
1042 }
1043
1044 #define VX_NOT_CMP_OP 0xffffffff
print_vector_fp_result(unsigned int cc,vx_fp_test_t * test_group,int i,Bool print_vec_out)1045 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1046 {
1047 int a_idx, b_idx, k;
1048 char * name = malloc(20);
1049 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1050 int loops = dp ? 2 : 4;
1051 fp_test_args_t * targs = &test_group->targs[i];
1052 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1053 unsigned int * frA_sp, *frB_sp, * dst_sp;
1054 strcpy(name, test_group->name);
1055 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1056 for (k = 0; k < loops; k++) {
1057 a_idx = targs->fra_idx;
1058 b_idx = targs->frb_idx;
1059 if (k)
1060 printf(" AND ");
1061 if (dp) {
1062 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1063 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1064 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1065 } else {
1066 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1067 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1068 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1069 }
1070 targs++;
1071 }
1072 if (cc != VX_NOT_CMP_OP)
1073 printf(" ? cc=%x", cc);
1074
1075 if (print_vec_out) {
1076 if (dp) {
1077 dst_dp = (unsigned long long *) &vec_out;
1078 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1079 } else {
1080 dst_sp = (unsigned int *) &vec_out;
1081 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1082 }
1083 } else {
1084 printf("\n");
1085 }
1086 free(name);
1087 }
1088
1089
1090
test_vsx_one_fp_arg(void)1091 static void test_vsx_one_fp_arg(void)
1092 {
1093 test_func_t func;
1094 int k;
1095 k = 0;
1096 build_special_fargs_table();
1097
1098 while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1099 int idx, i;
1100 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1101 Bool estimate = (test_group.type == VX_ESTIMATE);
1102 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1103 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1104 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1105 Bool sparse_sp = False;
1106 int stride = dp ? 2 : 4;
1107 int loops = is_scalar ? 1 : stride;
1108 stride = is_scalar ? 1: stride;
1109
1110 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1111 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1112 * or
1113 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1114 *
1115 * For the vector op case, we need to adjust stride from '4' to '2', since
1116 * we'll only be loading two values per loop into the input register.
1117 */
1118 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1119 sparse_sp = True;
1120 stride = 2;
1121 }
1122
1123 for (i = 0; i < test_group.num_tests; i+=stride) {
1124 unsigned int * pv;
1125 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1126
1127 pv = (unsigned int *)&vec_out;
1128 // clear vec_out
1129 for (idx = 0; idx < 4; idx++, pv++)
1130 *pv = 0;
1131
1132 if (dp) {
1133 int j;
1134 unsigned long long * frB_dp, *dst_dp;
1135 for (j = 0; j < loops; j++) {
1136 inB = (void *)&spec_fargs[i + j];
1137 // copy double precision FP into vector element i
1138 if (isLE && is_scalar)
1139 vecB_void_ptr += 8;
1140 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1141 }
1142 // execute test insn
1143 (*func)();
1144 dst_dp = (unsigned long long *) &vec_out;
1145 if (isLE && is_scalar)
1146 dst_dp++;
1147 printf("#%d: %s ", i/stride, test_group.name);
1148 for (j = 0; j < loops; j++) {
1149 if (j)
1150 printf("; ");
1151 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1152 printf("%s(%016llx)", test_group.op, *frB_dp);
1153 if (estimate) {
1154 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1155 printf(" ==> %s)", res ? "PASS" : "FAIL");
1156 /* For debugging . . .
1157 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1158 */
1159 } else {
1160 vx_fp_test_type type = test_group.type;
1161 switch (type) {
1162 case VX_SCALAR_CONV_TO_WORD:
1163 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1164 break;
1165 case VX_CONV_TO_SINGLE:
1166 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1167 break;
1168 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1169 printf(" = %016llx", dst_dp[j]);
1170 }
1171 }
1172 }
1173 printf("\n");
1174 } else {
1175 int j;
1176 unsigned int * frB_sp, * dst_sp = NULL;
1177 unsigned long long * dst_dp = NULL;
1178 if (sparse_sp)
1179 loops = 2;
1180 for (j = 0; j < loops; j++) {
1181 inB = (void *)&spec_sp_fargs[i + j];
1182 // copy single precision FP into vector element i
1183 if (sparse_sp) {
1184 if (isLE)
1185 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1186 else
1187 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1188 } else {
1189 if (isLE && is_scalar)
1190 vecB_void_ptr += 12;
1191 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1192 }
1193 }
1194 // execute test insn
1195 (*func)();
1196 if (test_group.type == VX_CONV_TO_DOUBLE) {
1197 dst_dp = (unsigned long long *) &vec_out;
1198 if (isLE && is_scalar)
1199 dst_dp++;
1200 } else {
1201 dst_sp = (unsigned int *) &vec_out;
1202 if (isLE && is_scalar)
1203 dst_sp += 3;
1204 }
1205 // print result
1206 printf("#%d: %s ", i/stride, test_group.name);
1207 for (j = 0; j < loops; j++) {
1208 if (j)
1209 printf("; ");
1210 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1211 printf("%s(%08x)", test_group.op, *frB_sp);
1212 if (estimate) {
1213 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1214 printf(" ==> %s)", res ? "PASS" : "FAIL");
1215 } else {
1216 if (test_group.type == VX_CONV_TO_DOUBLE)
1217 printf(" = %016llx", dst_dp[j]);
1218 else
1219 /* Special case: Current VEX implementation for fsqrts (single precision)
1220 * uses the same implementation as that used for double precision fsqrt.
1221 * However, I've found that for xvsqrtsp, the result from that implementation
1222 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1223 * output to appear very different if you end up with a carry. But for the given
1224 * inputs in this testcase, we can simply mask out these bits.
1225 */
1226 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1227 }
1228 }
1229 printf("\n");
1230 }
1231 }
1232 k++;
1233 printf( "\n" );
1234 }
1235 }
1236
test_int_to_fp_convert(void)1237 static void test_int_to_fp_convert(void)
1238 {
1239 test_func_t func;
1240 int k;
1241 k = 0;
1242
1243 while ((func = intToFp_tests[k].test_func)) {
1244 int idx, i;
1245 vx_intToFp_test_t test_group = intToFp_tests[k];
1246 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1247 Bool sparse_sp = False;
1248 int stride = dp ? 2 : 4;
1249 int loops = stride;
1250
1251 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1252 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1253 * or
1254 * We need to adjust stride from '4' to '2', since we'll only be loading
1255 * two values per loop into the input register.
1256 */
1257 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1258 sparse_sp = True;
1259 stride = 2;
1260 }
1261
1262 for (i = 0; i < test_group.num_tests; i+=stride) {
1263 unsigned int * pv;
1264 void * inB;
1265
1266 pv = (unsigned int *)&vec_out;
1267 // clear vec_out
1268 for (idx = 0; idx < 4; idx++, pv++)
1269 *pv = 0;
1270
1271 if (dp) {
1272 int j;
1273 unsigned long long *dst_dw, * targs = test_group.targs;
1274 for (j = 0; j < loops; j++) {
1275 inB = (void *)&targs[i + j];
1276 // copy doubleword into vector element i
1277 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1278 }
1279 // execute test insn
1280 (*func)();
1281 dst_dw = (unsigned long long *) &vec_out;
1282 printf("#%d: %s ", i/stride, test_group.name);
1283 for (j = 0; j < loops; j++) {
1284 if (j)
1285 printf("; ");
1286 printf("conv(%016llx)", targs[i + j]);
1287
1288 if (test_group.type == VX_CONV_TO_SINGLE)
1289 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1290 else
1291 printf(" = %016llx", dst_dw[j]);
1292 }
1293 printf("\n");
1294 } else {
1295 int j;
1296 unsigned int * dst_sp = NULL;
1297 unsigned int * targs = test_group.targs;
1298 unsigned long long * dst_dp = NULL;
1299 void * vecB_void_ptr = (void *)&vec_inB;
1300 if (sparse_sp)
1301 loops = 2;
1302 for (j = 0; j < loops; j++) {
1303 inB = (void *)&targs[i + j];
1304 // copy single word into vector element i
1305 if (sparse_sp) {
1306 if (isLE)
1307 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1308 else
1309 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1310 } else {
1311 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1312 }
1313 }
1314 // execute test insn
1315 (*func)();
1316 if (test_group.type == VX_CONV_TO_DOUBLE)
1317 dst_dp = (unsigned long long *) &vec_out;
1318 else
1319 dst_sp = (unsigned int *) &vec_out;
1320 // print result
1321 printf("#%d: %s ", i/stride, test_group.name);
1322 for (j = 0; j < loops; j++) {
1323 if (j)
1324 printf("; ");
1325 printf("conv(%08x)", targs[i + j]);
1326 if (test_group.type == VX_CONV_TO_DOUBLE)
1327 printf(" = %016llx", dst_dp[j]);
1328 else
1329 printf(" = %08x", dst_sp[j]);
1330 }
1331 printf("\n");
1332 }
1333 }
1334 k++;
1335 printf( "\n" );
1336 }
1337 }
1338
1339
1340
1341 // The div doubleword test data
1342 signed long long div_dw_tdata[13][2] = {
1343 { 4, -4 },
1344 { 4, -3 },
1345 { 4, 4 },
1346 { 4, -5 },
1347 { 3, 8 },
1348 { 0x8000000000000000ULL, 0xa },
1349 { 0x50c, -1 },
1350 { 0x50c, -4096 },
1351 { 0x1234fedc, 0x8000a873 },
1352 { 0xabcd87651234fedcULL, 0xa123b893 },
1353 { 0x123456789abdcULL, 0 },
1354 { 0, 2 },
1355 { 0x77, 0xa3499 }
1356 };
1357 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1358
1359 // The div word test data
1360 unsigned int div_w_tdata[6][2] = {
1361 { 0, 2 },
1362 { 2, 0 },
1363 { 0x7abc1234, 0xf0000000 },
1364 { 0xfabc1234, 5 },
1365 { 77, 66 },
1366 { 5, 0xfabc1234 },
1367 };
1368 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1369
1370 typedef struct div_ext_test
1371 {
1372 test_func_t test_func;
1373 const char *name;
1374 int num_tests;
1375 div_type_t div_type;
1376 precision_type_t precision;
1377 } div_ext_test_t;
1378
1379 static div_ext_test_t div_tests[] = {
1380 #ifdef __powerpc64__
1381 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1382 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1383 #endif
1384 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1385 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1386 { NULL, NULL, 0, 0, 0 }
1387 };
1388
test_div_extensions(void)1389 static void test_div_extensions(void)
1390 {
1391 test_func_t func;
1392 int k;
1393 k = 0;
1394
1395 while ((func = div_tests[k].test_func)) {
1396 int i, repeat = 1;
1397 div_ext_test_t test_group = div_tests[k];
1398 do_dot = False;
1399
1400 again:
1401 for (i = 0; i < test_group.num_tests; i++) {
1402 unsigned int condreg;
1403
1404 if (test_group.div_type == DIV_OE)
1405 do_OE = True;
1406 else
1407 do_OE = False;
1408
1409 if (test_group.precision == DOUBLE_TEST) {
1410 r14 = div_dw_tdata[i][0];
1411 r15 = div_dw_tdata[i][1];
1412 } else {
1413 r14 = div_w_tdata[i][0];
1414 r15 = div_w_tdata[i][1];
1415 }
1416 // execute test insn
1417 (*func)();
1418 condreg = (div_flags & 0xf0000000) >> 28;
1419 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1420 if (test_group.precision == DOUBLE_TEST) {
1421 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1422 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1423 } else {
1424 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1425 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1426 }
1427 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1428 }
1429 printf("\n");
1430 if (repeat) {
1431 repeat = 0;
1432 do_dot = True;
1433 goto again;
1434 }
1435 k++;
1436 printf( "\n" );
1437 }
1438 }
1439
1440
test_vx_tdivORtsqrt(void)1441 static void test_vx_tdivORtsqrt(void)
1442 {
1443 test_func_t func;
1444 int k, crx;
1445 unsigned int flags;
1446 k = 0;
1447 do_dot = False;
1448 build_special_fargs_table();
1449
1450 while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1451 int idx, i;
1452 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1453 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1454 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1455 Bool two_args = test_group.targs ? True : False;
1456 int stride = dp ? 2 : 4;
1457 int loops = is_scalar ? 1 : stride;
1458 stride = is_scalar ? 1: stride;
1459
1460 for (i = 0; i < test_group.num_tests; i+=stride) {
1461 unsigned int * pv;
1462 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1463
1464 pv = (unsigned int *)&vec_out;
1465 // clear vec_out
1466 for (idx = 0; idx < 4; idx++, pv++)
1467 *pv = 0;
1468
1469 if (dp) {
1470 int j;
1471 unsigned long long * frB_dp;
1472 if (two_args) {
1473 setup_dp_fp_args(&test_group.targs[i], False);
1474 } else {
1475 for (j = 0; j < loops; j++) {
1476 inB = (void *)&spec_fargs[i + j];
1477 // copy double precision FP into vector element i
1478 if (isLE && is_scalar)
1479 vecB_void_ptr += 8;
1480 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1481 }
1482 }
1483 // execute test insn
1484 // Must do set/get of CRs immediately before/after calling the asm func
1485 // to avoid CRs being modified by other instructions.
1486 SET_FPSCR_ZERO;
1487 SET_CR_XER_ZERO;
1488 (*func)();
1489 GET_CR(flags);
1490 // assumes using CR1
1491 crx = (flags & 0x0f000000) >> 24;
1492 if (two_args) {
1493 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1494 } else {
1495 printf("#%d: %s ", i/stride, test_group.name);
1496 for (j = 0; j < loops; j++) {
1497 if (j)
1498 printf("; ");
1499 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1500 printf("%s(%016llx)", test_group.op, *frB_dp);
1501 }
1502 printf( " ? %x (CRx)\n", crx);
1503 }
1504 } else {
1505 int j;
1506 unsigned int * frB_sp;
1507 if (two_args) {
1508 setup_sp_fp_args(&test_group.targs[i], False);
1509 } else {
1510 for (j = 0; j < loops; j++) {
1511 inB = (void *)&spec_sp_fargs[i + j];
1512 // copy single precision FP into vector element i
1513 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1514 }
1515 }
1516 // execute test insn
1517 SET_FPSCR_ZERO;
1518 SET_CR_XER_ZERO;
1519 (*func)();
1520 GET_CR(flags);
1521 crx = (flags & 0x0f000000) >> 24;
1522 // print result
1523 if (two_args) {
1524 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1525 } else {
1526 printf("#%d: %s ", i/stride, test_group.name);
1527 for (j = 0; j < loops; j++) {
1528 if (j)
1529 printf("; ");
1530 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1531 printf("%s(%08x)", test_group.op, *frB_sp);
1532 }
1533 printf( " ? %x (CRx)\n", crx);
1534 }
1535 }
1536 }
1537 k++;
1538 printf( "\n" );
1539 }
1540 }
1541
1542
test_ftsqrt(void)1543 static void test_ftsqrt(void)
1544 {
1545 int i, crx;
1546 unsigned int flags;
1547 unsigned long long * frbp;
1548 build_special_fargs_table();
1549
1550
1551 for (i = 0; i < nb_special_fargs; i++) {
1552 f14 = spec_fargs[i];
1553 frbp = (unsigned long long *)&spec_fargs[i];
1554 SET_FPSCR_ZERO;
1555 SET_CR_XER_ZERO;
1556 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
1557 GET_CR(flags);
1558 crx = (flags & 0x0f000000) >> 24;
1559 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1560 }
1561 printf( "\n" );
1562 }
1563
1564 static void
test_popcntw(void)1565 test_popcntw(void)
1566 {
1567 #ifdef __powerpc64__
1568 uint64_t res;
1569 unsigned long long src = 0x9182736405504536ULL;
1570 r14 = src;
1571 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1572 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1573 #else
1574 uint32_t res;
1575 unsigned int src = 0x9182730E;
1576 r14 = src;
1577 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1578 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1579 #endif
1580 printf( "\n" );
1581 }
1582
1583
1584 static test_table_t
1585 all_tests[] =
1586 {
1587
1588 { &test_vsx_one_fp_arg,
1589 "Test VSX vector and scalar single argument instructions"} ,
1590 { &test_int_to_fp_convert,
1591 "Test VSX vector integer to float conversion instructions" },
1592 { &test_div_extensions,
1593 "Test div extensions" },
1594 { &test_ftsqrt,
1595 "Test ftsqrt instruction" },
1596 { &test_vx_tdivORtsqrt,
1597 "Test vector and scalar tdiv and tsqrt instructions" },
1598 { &test_popcntw,
1599 "Test popcntw instruction" },
1600 { NULL, NULL }
1601 };
1602 #endif // HAS_VSX
1603
main(int argc,char * argv[])1604 int main(int argc, char *argv[])
1605 {
1606 #ifdef HAS_VSX
1607
1608 test_table_t aTest;
1609 test_func_t func;
1610 int i = 0;
1611
1612 while ((func = all_tests[i].test_category)) {
1613 aTest = all_tests[i];
1614 printf( "%s\n", aTest.name );
1615 (*func)();
1616 i++;
1617 }
1618 if (spec_fargs)
1619 free(spec_fargs);
1620 if (spec_sp_fargs)
1621 free(spec_sp_fargs);
1622
1623 #endif // HAS _VSX
1624
1625 return 0;
1626 }
1627