1 /*  Copyright (C) 2011 IBM
2 
3  Author: Maynard Johnson <maynardj@us.ibm.com>
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU General Public License as
7  published by the Free Software Foundation; either version 2 of the
8  License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307, USA.
19 
20  The GNU General Public License is contained in the file COPYING.
21  */
22 
23 #ifdef HAS_VSX
24 
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <malloc.h>
30 #include <altivec.h>
31 
32 #ifndef __powerpc64__
33 typedef uint32_t HWord_t;
34 #else
35 typedef uint64_t HWord_t;
36 #endif /* __powerpc64__ */
37 
38 #ifdef VGP_ppc64le_linux
39 #define isLE 1
40 #else
41 #define isLE 0
42 #endif
43 
44 register HWord_t r14 __asm__ ("r14");
45 register HWord_t r15 __asm__ ("r15");
46 register HWord_t r16 __asm__ ("r16");
47 register HWord_t r17 __asm__ ("r17");
48 register double f14 __asm__ ("fr14");
49 register double f15 __asm__ ("fr15");
50 register double f16 __asm__ ("fr16");
51 register double f17 __asm__ ("fr17");
52 
53 static volatile unsigned int cond_reg;
54 
55 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
56 
57 #define SET_CR(_arg) \
58       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
59 
60 #define SET_XER(_arg) \
61       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
62 
63 #define GET_CR(_lval) \
64       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
65 
66 #define GET_XER(_lval) \
67       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
68 
69 #define GET_CR_XER(_lval_cr,_lval_xer) \
70    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
71 
72 #define SET_CR_ZERO \
73       SET_CR(0)
74 
75 #define SET_XER_ZERO \
76       SET_XER(0)
77 
78 #define SET_CR_XER_ZERO \
79    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
80 
81 #define SET_FPSCR_ZERO \
82    do { double _d = 0.0; \
83         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
84    } while (0)
85 
86 
87 typedef void (*test_func_t)(void);
88 typedef struct ldst_test ldst_test_t;
89 typedef struct vsx_logic_test logic_test_t;
90 typedef struct xs_conv_test xs_conv_test_t;
91 typedef struct p7_fp_test fp_test_t;
92 typedef struct vx_fp_test vx_fp_test_t;
93 typedef struct vsx_move_test move_test_t;
94 typedef struct vsx_permute_test permute_test_t;
95 typedef struct test_table test_table_t;
96 
97 static double *fargs = NULL;
98 static int nb_fargs;
99 
100 /* These functions below that construct a table of floating point
101  * values were lifted from none/tests/ppc32/jm-insns.c.
102  */
103 
104 #if defined (DEBUG_ARGS_BUILD)
105 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106 #else
107 #define AB_DPRINTF(fmt, args...) do { } while (0)
108 #endif
109 
register_farg(void * farg,int s,uint16_t _exp,uint64_t mant)110 static inline void register_farg (void *farg,
111                                   int s, uint16_t _exp, uint64_t mant)
112 {
113    uint64_t tmp;
114 
115    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
116    *(uint64_t *)farg = tmp;
117    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
118               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119 }
120 
build_fargs_table(void)121 static void build_fargs_table(void)
122 /*
123  * Double precision:
124  * Sign goes from zero to one               (1 bit)
125  * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
126  * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
127  * + special values:
128  * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
129  * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
130  * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
131  * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
132  * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
133  * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
134  * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
135  * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
136  * (8 values)
137  *
138  * Single precision
139  * Sign:     1 bit
140  * Exponent: 8 bits
141  * Mantissa: 23 bits
142  * +0.0      : 0 0x00 0x000000 => 0x00000000
143  * -0.0      : 1 0x00 0x000000 => 0x80000000
144  * +infinity : 0 0xFF 0x000000 => 0x7F800000
145  * -infinity : 1 0xFF 0x000000 => 0xFF800000
146  * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
147  * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
148  * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
149  * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
150 */
151 {
152    uint64_t mant;
153    uint16_t _exp, e1;
154    int s;
155    int i=0;
156 
157    if (nb_fargs)
158       return;
159 
160    fargs = malloc( 16 * sizeof(double) );
161    for (s = 0; s < 2; s++) {
162       for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) {
163          if (e1 >= 0x400)
164             e1 = 0x3fe;
165          _exp = e1;
166          for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
167          /* Add 'random' bits */
168          mant = ((mant + 0x4A6) << 29) + 0x359) {
169             register_farg( &fargs[i++], s, _exp, mant );
170          }
171          if (e1 == 0x3fe)
172             break;
173       }
174    }
175    // add a few smaller values to fargs . . .
176    s = 0;
177    _exp = 0x002;
178    mant = 0x0000000000b01ULL;
179    register_farg(&fargs[i++], s, _exp, mant);
180 
181    _exp = 0x000;
182    mant = 0x00000203f0b3dULL;
183    register_farg(&fargs[i++], s, _exp, mant);
184 
185    mant = 0x00000005a203dULL;
186    register_farg(&fargs[i++], s, _exp, mant);
187 
188    s = 1;
189    _exp = 0x002;
190    mant = 0x0000000000b01ULL;
191    register_farg(&fargs[i++], s, _exp, mant);
192 
193    _exp = 0x000;
194    mant = 0x00000203f0b3dULL;
195    register_farg(&fargs[i++], s, _exp, mant);
196 
197    nb_fargs = i;
198 }
199 
200 
201 typedef struct fp_test_args {
202    int fra_idx;
203    int frb_idx;
204    int cr_flags;
205 } fp_test_args_t;
206 
207 
208 fp_test_args_t ftdiv_tests[] = {
209                               {0, 1, 0x8},
210                               {9, 1, 0xa},
211                               {1, 12, 0xa},
212                               {0, 2, 0xa},
213                               {1, 3, 0xa},
214                               {3, 0, 0xa},
215                               {0, 3, 0xa},
216                               {4, 0, 0xa},
217                               {7, 1, 0xe},
218                               {8, 1, 0xe},
219                               {1, 7, 0xe},
220                               {0, 13, 0xe},
221                               {5, 5, 0xe},
222                               {5, 6, 0xe},
223 };
224 
225 fp_test_args_t xscmpX_tests[] = {
226                                    {8, 8, 0x2},
227                                    {8, 14, 0x8},
228                                    {8, 6, 0x8},
229                                    {8, 5, 0x8},
230                                    {8, 4, 0x8},
231                                    {8, 7, 0x8},
232                                    {8, 9, 0x1},
233                                    {8, 11, 0x1},
234                                    {14, 8, 0x4},
235                                    {14, 14, 0x2},
236                                    {14, 6, 0x8},
237                                    {14, 5, 0x8},
238                                    {14, 4, 0x8},
239                                    {14, 7, 0x8},
240                                    {14, 9, 0x1},
241                                    {14, 11, 0x1},
242                                    {6, 8, 0x4},
243                                    {6, 14, 0x4},
244                                    {6, 6, 0x2},
245                                    {6, 5, 0x2},
246                                    {6, 4, 0x8},
247                                    {6, 7, 0x8},
248                                    {6, 9, 0x1},
249                                    {6, 11, 0x1},
250                                    {5, 8, 0x4},
251                                    {5, 14, 0x4},
252                                    {5, 6, 0x2},
253                                    {5, 5, 0x2},
254                                    {5, 4, 0x8},
255                                    {5, 7, 0x8},
256                                    {5, 9, 0x1},
257                                    {5, 11, 0x1},
258                                    {4, 8, 0x4},
259                                    {4, 14, 0x4},
260                                    {4, 6, 0x4},
261                                    {4, 5, 0x4},
262                                    {4, 1, 0x8},
263                                    {4, 7, 0x8},
264                                    {4, 9, 0x1},
265                                    {4, 11, 0x1},
266                                    {7, 8, 0x4},
267                                    {7, 14, 0x4},
268                                    {7, 6, 0x4},
269                                    {7, 5, 0x4},
270                                    {7, 4, 0x4},
271                                    {7, 7, 0x2},
272                                    {7, 9, 0x1},
273                                    {7, 11, 0x1},
274                                    {10, 8, 0x1},
275                                    {10, 14, 0x1},
276                                    {10, 6, 0x1},
277                                    {10, 5, 0x1},
278                                    {10, 4, 0x1},
279                                    {10, 7, 0x1},
280                                    {10, 9, 0x1},
281                                    {10, 11, 0x1},
282                                    {12, 8, 0x1},
283                                    {12, 14, 0x1},
284                                    {12, 6, 0x1},
285                                    {12, 5, 0x1},
286                                    {12, 4, 0x1},
287                                    {12, 7, 0x1},
288                                    {12, 9, 0x1},
289                                    {12, 11, 0x1},
290 };
291 
292 fp_test_args_t xsadddp_tests[] = {
293                                    {8, 8, 0x0},
294                                    {8, 14, 0x0},
295                                    {8, 6, 0x0},
296                                    {8, 5, 0x0},
297                                    {8, 4, 0x0},
298                                    {8, 7, 0x0},
299                                    {8, 9, 0x0},
300                                    {8, 11, 0x0},
301                                    {14, 8, 0x0},
302                                    {14, 14, 0x0},
303                                    {14, 6, 0x0},
304                                    {14, 5, 0x0},
305                                    {14, 4, 0x0},
306                                    {14, 7, 0x0},
307                                    {14, 9, 0x0},
308                                    {14, 11, 0x0},
309                                    {6, 8, 0x0},
310                                    {6, 14, 0x0},
311                                    {6, 6, 0x0},
312                                    {6, 5, 0x0},
313                                    {6, 4, 0x0},
314                                    {6, 7, 0x0},
315                                    {6, 9, 0x0},
316                                    {6, 11, 0x0},
317                                    {5, 8, 0x0},
318                                    {5, 14, 0x0},
319                                    {5, 6, 0x0},
320                                    {5, 5, 0x0},
321                                    {5, 4, 0x0},
322                                    {5, 7, 0x0},
323                                    {5, 9, 0x0},
324                                    {5, 11, 0x0},
325                                    {4, 8, 0x0},
326                                    {4, 14, 0x0},
327                                    {4, 6, 0x0},
328                                    {4, 5, 0x0},
329                                    {4, 1, 0x0},
330                                    {4, 7, 0x0},
331                                    {4, 9, 0x0},
332                                    {4, 11, 0x0},
333                                    {7, 8, 0x0},
334                                    {7, 14, 0x0},
335                                    {7, 6, 0x0},
336                                    {7, 5, 0x0},
337                                    {7, 4, 0x0},
338                                    {7, 7, 0x0},
339                                    {7, 9, 0x0},
340                                    {7, 11, 0x0},
341                                    {10, 8, 0x0},
342                                    {10, 14, 0x0},
343                                    {10, 6, 0x0},
344                                    {10, 5, 0x0},
345                                    {10, 4, 0x0},
346                                    {10, 7, 0x0},
347                                    {10, 9, 0x0},
348                                    {10, 11, 0x0},
349                                    {12, 8, 0x0},
350                                    {12, 14, 0x0},
351                                    {12, 6, 0x0},
352                                    {12, 5, 0x0},
353                                    {12, 4, 0x0},
354                                    {12, 7, 0x0},
355                                    {12, 9, 0x0},
356                                    {12, 11, 0x0},
357 };
358 
359 fp_test_args_t xsdivdp_tests[] = {
360                                    {8, 8, 0x0},
361                                    {8, 14, 0x0},
362                                    {8, 6, 0x0},
363                                    {8, 5, 0x0},
364                                    {8, 4, 0x0},
365                                    {8, 7, 0x0},
366                                    {8, 9, 0x0},
367                                    {8, 11, 0x0},
368                                    {14, 8, 0x0},
369                                    {14, 14, 0x0},
370                                    {14, 6, 0x0},
371                                    {14, 5, 0x0},
372                                    {14, 4, 0x0},
373                                    {14, 7, 0x0},
374                                    {14, 9, 0x0},
375                                    {14, 11, 0x0},
376                                    {6, 8, 0x0},
377                                    {6, 14, 0x0},
378                                    {6, 6, 0x0},
379                                    {6, 5, 0x0},
380                                    {6, 4, 0x0},
381                                    {6, 7, 0x0},
382                                    {6, 9, 0x0},
383                                    {6, 11, 0x0},
384                                    {5, 8, 0x0},
385                                    {5, 14, 0x0},
386                                    {5, 6, 0x0},
387                                    {5, 5, 0x0},
388                                    {5, 4, 0x0},
389                                    {5, 7, 0x0},
390                                    {5, 9, 0x0},
391                                    {5, 11, 0x0},
392                                    {4, 8, 0x0},
393                                    {4, 14, 0x0},
394                                    {4, 6, 0x0},
395                                    {4, 5, 0x0},
396                                    {4, 1, 0x0},
397                                    {4, 7, 0x0},
398                                    {4, 9, 0x0},
399                                    {4, 11, 0x0},
400                                    {7, 8, 0x0},
401                                    {7, 14, 0x0},
402                                    {7, 6, 0x0},
403                                    {7, 5, 0x0},
404                                    {7, 4, 0x0},
405                                    {7, 7, 0x0},
406                                    {7, 9, 0x0},
407                                    {7, 11, 0x0},
408                                    {10, 8, 0x0},
409                                    {10, 14, 0x0},
410                                    {10, 6, 0x0},
411                                    {10, 5, 0x0},
412                                    {10, 4, 0x0},
413                                    {10, 7, 0x0},
414                                    {10, 9, 0x0},
415                                    {10, 11, 0x0},
416                                    {12, 8, 0x0},
417                                    {12, 14, 0x0},
418                                    {12, 6, 0x0},
419                                    {12, 5, 0x0},
420                                    {12, 4, 0x0},
421                                    {12, 7, 0x0},
422                                    {12, 9, 0x0},
423                                    {12, 11, 0x0},
424 };
425 
426 fp_test_args_t xsmaddXdp_tests[] = {
427                                    {8, 8, 0x0},
428                                    {8, 14, 0x0},
429                                    {8, 6, 0x0},
430                                    {8, 5, 0x0},
431                                    {8, 4, 0x0},
432                                    {8, 7, 0x0},
433                                    {8, 9, 0x0},
434                                    {8, 11, 0x0},
435                                    {14, 8, 0x0},
436                                    {14, 14, 0x0},
437                                    {14, 6, 0x0},
438                                    {14, 5, 0x0},
439                                    {14, 4, 0x0},
440                                    {14, 7, 0x0},
441                                    {14, 9, 0x0},
442                                    {14, 11, 0x0},
443                                    {6, 8, 0x0},
444                                    {6, 14, 0x0},
445                                    {6, 6, 0x0},
446                                    {6, 5, 0x0},
447                                    {6, 4, 0x0},
448                                    {6, 7, 0x0},
449                                    {6, 9, 0x0},
450                                    {6, 11, 0x0},
451                                    {5, 8, 0x0},
452                                    {5, 14, 0x0},
453                                    {5, 6, 0x0},
454                                    {5, 5, 0x0},
455                                    {5, 4, 0x0},
456                                    {5, 7, 0x0},
457                                    {5, 9, 0x0},
458                                    {5, 11, 0x0},
459                                    {4, 8, 0x0},
460                                    {4, 14, 0x0},
461                                    {4, 6, 0x0},
462                                    {4, 5, 0x0},
463                                    {4, 1, 0x0},
464                                    {4, 7, 0x0},
465                                    {4, 9, 0x0},
466                                    {4, 11, 0x0},
467                                    {7, 8, 0x0},
468                                    {7, 14, 0x0},
469                                    {7, 6, 0x0},
470                                    {7, 5, 0x0},
471                                    {7, 4, 0x0},
472                                    {7, 7, 0x0},
473                                    {7, 9, 0x0},
474                                    {7, 11, 0x0},
475                                    {10, 8, 0x0},
476                                    {10, 14, 0x0},
477                                    {10, 6, 0x0},
478                                    {10, 5, 0x0},
479                                    {10, 4, 0x0},
480                                    {10, 7, 0x0},
481                                    {10, 9, 0x0},
482                                    {10, 11, 0x0},
483                                    {12, 8, 0x0},
484                                    {12, 14, 0x0},
485                                    {12, 6, 0x0},
486                                    {12, 5, 0x0},
487                                    {12, 4, 0x0},
488                                    {12, 7, 0x0},
489                                    {12, 9, 0x0},
490                                    {12, 11, 0x0},
491 };
492 
493 fp_test_args_t xsmsubXdp_tests[] = {
494                                    {8, 8, 0x0},
495                                    {8, 14, 0x0},
496                                    {8, 6, 0x0},
497                                    {8, 5, 0x0},
498                                    {8, 4, 0x0},
499                                    {8, 7, 0x0},
500                                    {8, 9, 0x0},
501                                    {8, 11, 0x0},
502                                    {14, 8, 0x0},
503                                    {14, 14, 0x0},
504                                    {14, 6, 0x0},
505                                    {14, 5, 0x0},
506                                    {14, 4, 0x0},
507                                    {14, 7, 0x0},
508                                    {14, 9, 0x0},
509                                    {14, 11, 0x0},
510                                    {6, 8, 0x0},
511                                    {6, 14, 0x0},
512                                    {6, 6, 0x0},
513                                    {6, 5, 0x0},
514                                    {6, 4, 0x0},
515                                    {6, 7, 0x0},
516                                    {6, 9, 0x0},
517                                    {6, 11, 0x0},
518                                    {5, 8, 0x0},
519                                    {5, 14, 0x0},
520                                    {5, 6, 0x0},
521                                    {5, 5, 0x0},
522                                    {5, 4, 0x0},
523                                    {5, 7, 0x0},
524                                    {5, 9, 0x0},
525                                    {5, 11, 0x0},
526                                    {4, 8, 0x0},
527                                    {4, 14, 0x0},
528                                    {4, 6, 0x0},
529                                    {4, 5, 0x0},
530                                    {4, 1, 0x0},
531                                    {4, 7, 0x0},
532                                    {4, 9, 0x0},
533                                    {4, 11, 0x0},
534                                    {7, 8, 0x0},
535                                    {7, 14, 0x0},
536                                    {7, 6, 0x0},
537                                    {7, 5, 0x0},
538                                    {7, 4, 0x0},
539                                    {7, 7, 0x0},
540                                    {7, 9, 0x0},
541                                    {7, 11, 0x0},
542                                    {10, 8, 0x0},
543                                    {10, 14, 0x0},
544                                    {10, 6, 0x0},
545                                    {10, 5, 0x0},
546                                    {10, 4, 0x0},
547                                    {10, 7, 0x0},
548                                    {10, 9, 0x0},
549                                    {10, 11, 0x0},
550                                    {12, 8, 0x0},
551                                    {12, 14, 0x0},
552                                    {12, 6, 0x0},
553                                    {12, 5, 0x0},
554                                    {12, 4, 0x0},
555                                    {12, 7, 0x0},
556                                    {12, 9, 0x0},
557                                    {12, 11, 0x0},
558 };
559 
560 fp_test_args_t xsnmaddXdp_tests[] = {
561                                      {8, 8, 0x0},
562                                      {8, 14, 0x0},
563                                      {8, 6, 0x0},
564                                      {8, 5, 0x0},
565                                      {8, 4, 0x0},
566                                      {8, 7, 0x0},
567                                      {8, 9, 0x0},
568                                      {8, 11, 0x0},
569                                      {14, 8, 0x0},
570                                      {14, 14, 0x0},
571                                      {14, 6, 0x0},
572                                      {14, 5, 0x0},
573                                      {14, 4, 0x0},
574                                      {14, 7, 0x0},
575                                      {14, 9, 0x0},
576                                      {14, 11, 0x0},
577                                      {6, 8, 0x0},
578                                      {6, 14, 0x0},
579                                      {6, 6, 0x0},
580                                      {6, 5, 0x0},
581                                      {6, 4, 0x0},
582                                      {6, 7, 0x0},
583                                      {6, 9, 0x0},
584                                      {6, 11, 0x0},
585                                      {5, 8, 0x0},
586                                      {5, 14, 0x0},
587                                      {5, 6, 0x0},
588                                      {5, 5, 0x0},
589                                      {5, 4, 0x0},
590                                      {5, 7, 0x0},
591                                      {5, 9, 0x0},
592                                      {5, 11, 0x0},
593                                      {4, 8, 0x0},
594                                      {4, 14, 0x0},
595                                      {4, 6, 0x0},
596                                      {4, 5, 0x0},
597                                      {4, 1, 0x0},
598                                      {4, 7, 0x0},
599                                      {4, 9, 0x0},
600                                      {4, 11, 0x0},
601                                      {7, 8, 0x0},
602                                      {7, 14, 0x0},
603                                      {7, 6, 0x0},
604                                      {7, 5, 0x0},
605                                      {7, 4, 0x0},
606                                      {7, 7, 0x0},
607                                      {7, 9, 0x0},
608                                      {7, 11, 0x0},
609                                      {10, 8, 0x0},
610                                      {10, 14, 0x0},
611                                      {10, 6, 0x0},
612                                      {10, 5, 0x0},
613                                      {10, 4, 0x0},
614                                      {10, 7, 0x0},
615                                      {10, 9, 0x0},
616                                      {10, 11, 0x0},
617                                      {12, 8, 0x0},
618                                      {12, 14, 0x0},
619                                      {12, 6, 0x0},
620                                      {12, 5, 0x0},
621                                      {12, 4, 0x0},
622                                      {12, 7, 0x0},
623                                      {12, 9, 0x0},
624                                      {12, 11, 0x0},
625 };
626 
627 fp_test_args_t xsmuldp_tests[] = {
628                                   {8, 8, 0x0},
629                                   {8, 14, 0x0},
630                                   {8, 6, 0x0},
631                                   {8, 5, 0x0},
632                                   {8, 4, 0x0},
633                                   {8, 7, 0x0},
634                                   {8, 9, 0x0},
635                                   {8, 11, 0x0},
636                                   {14, 8, 0x0},
637                                   {14, 14, 0x0},
638                                   {14, 6, 0x0},
639                                   {14, 5, 0x0},
640                                   {14, 4, 0x0},
641                                   {14, 7, 0x0},
642                                   {14, 9, 0x0},
643                                   {14, 11, 0x0},
644                                   {6, 8, 0x0},
645                                   {6, 14, 0x0},
646                                   {6, 6, 0x0},
647                                   {6, 5, 0x0},
648                                   {6, 4, 0x0},
649                                   {6, 7, 0x0},
650                                   {6, 9, 0x0},
651                                   {6, 11, 0x0},
652                                   {5, 8, 0x0},
653                                   {5, 14, 0x0},
654                                   {5, 6, 0x0},
655                                   {5, 5, 0x0},
656                                   {5, 4, 0x0},
657                                   {5, 7, 0x0},
658                                   {5, 9, 0x0},
659                                   {5, 11, 0x0},
660                                   {4, 8, 0x0},
661                                   {4, 14, 0x0},
662                                   {4, 6, 0x0},
663                                   {4, 5, 0x0},
664                                   {4, 1, 0x0},
665                                   {4, 7, 0x0},
666                                   {4, 9, 0x0},
667                                   {4, 11, 0x0},
668                                   {7, 8, 0x0},
669                                   {7, 14, 0x0},
670                                   {7, 6, 0x0},
671                                   {7, 5, 0x0},
672                                   {7, 4, 0x0},
673                                   {7, 7, 0x0},
674                                   {7, 9, 0x0},
675                                   {7, 11, 0x0},
676                                   {10, 8, 0x0},
677                                   {10, 14, 0x0},
678                                   {10, 6, 0x0},
679                                   {10, 5, 0x0},
680                                   {10, 4, 0x0},
681                                   {10, 7, 0x0},
682                                   {10, 9, 0x0},
683                                   {10, 11, 0x0},
684                                   {12, 8, 0x0},
685                                   {12, 14, 0x0},
686                                   {12, 6, 0x0},
687                                   {12, 5, 0x0},
688                                   {12, 4, 0x0},
689                                   {12, 7, 0x0},
690                                   {12, 9, 0x0},
691                                   {12, 11, 0x0},
692 };
693 
694 fp_test_args_t xssubdp_tests[] = {
695                                   {8, 8, 0x0},
696                                   {8, 14, 0x0},
697                                   {8, 6, 0x0},
698                                   {8, 5, 0x0},
699                                   {8, 4, 0x0},
700                                   {8, 7, 0x0},
701                                   {8, 9, 0x0},
702                                   {8, 11, 0x0},
703                                   {14, 8, 0x0},
704                                   {14, 14, 0x0},
705                                   {14, 6, 0x0},
706                                   {14, 5, 0x0},
707                                   {14, 4, 0x0},
708                                   {14, 7, 0x0},
709                                   {14, 9, 0x0},
710                                   {14, 11, 0x0},
711                                   {6, 8, 0x0},
712                                   {6, 14, 0x0},
713                                   {6, 6, 0x0},
714                                   {6, 5, 0x0},
715                                   {6, 4, 0x0},
716                                   {6, 7, 0x0},
717                                   {6, 9, 0x0},
718                                   {6, 11, 0x0},
719                                   {5, 8, 0x0},
720                                   {5, 14, 0x0},
721                                   {5, 6, 0x0},
722                                   {5, 5, 0x0},
723                                   {5, 4, 0x0},
724                                   {5, 7, 0x0},
725                                   {5, 9, 0x0},
726                                   {5, 11, 0x0},
727                                   {4, 8, 0x0},
728                                   {4, 14, 0x0},
729                                   {4, 6, 0x0},
730                                   {4, 5, 0x0},
731                                   {4, 1, 0x0},
732                                   {4, 7, 0x0},
733                                   {4, 9, 0x0},
734                                   {4, 11, 0x0},
735                                   {7, 8, 0x0},
736                                   {7, 14, 0x0},
737                                   {7, 6, 0x0},
738                                   {7, 5, 0x0},
739                                   {7, 4, 0x0},
740                                   {7, 7, 0x0},
741                                   {7, 9, 0x0},
742                                   {7, 11, 0x0},
743                                   {10, 8, 0x0},
744                                   {10, 14, 0x0},
745                                   {10, 6, 0x0},
746                                   {10, 5, 0x0},
747                                   {10, 4, 0x0},
748                                   {10, 7, 0x0},
749                                   {10, 9, 0x0},
750                                   {10, 11, 0x0},
751                                   {12, 8, 0x0},
752                                   {12, 14, 0x0},
753                                   {12, 6, 0x0},
754                                   {12, 5, 0x0},
755                                   {12, 4, 0x0},
756                                   {12, 7, 0x0},
757                                   {12, 9, 0x0},
758                                   {12, 11, 0x0},
759 };
760 
761 
762 
763 static int nb_special_fargs;
764 static double * spec_fargs;
765 
build_special_fargs_table(void)766 static void build_special_fargs_table(void)
767 {
768    /* The special floating point values created below are for
769     * use in the ftdiv tests for setting the fe_flag and fg_flag,
770     * but they can also be used for other tests (e.g., xscmpudp).
771     *
772     * Note that fl_flag is 'always '1' on ppc64 Linux.
773     *
774   Entry  Sign Exp   fraction                  Special value
775    0      0   3fd   0x8000000000000ULL         Positive finite number
776    1      0   404   0xf000000000000ULL         ...
777    2      0   001   0x8000000b77501ULL         ...
778    3      0   7fe   0x800000000051bULL         ...
779    4      0   012   0x3214569900000ULL         ...
780    5      0   000   0x0000000000000ULL         +0.0 (+zero)
781    6      1   000   0x0000000000000ULL         -0.0 (-zero)
782    7      0   7ff   0x0000000000000ULL         +infinity
783    8      1   7ff   0x0000000000000ULL         -infinity
784    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
785    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
786    11     0   7ff   0x8000000000000ULL         +QNaN
787    12     1   7ff   0x8000000000000ULL         -QNaN
788    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
789    14     1   40d   0x0650f5a07b353ULL         Negative finite number
790     */
791 
792    uint64_t mant;
793    uint16_t _exp;
794    int s;
795    int i = 0;
796 
797    if (spec_fargs)
798       return;
799 
800    spec_fargs = malloc( 16 * sizeof(double) );
801 
802    // #0
803    s = 0;
804    _exp = 0x3fd;
805    mant = 0x8000000000000ULL;
806    register_farg(&spec_fargs[i++], s, _exp, mant);
807 
808    // #1
809    s = 0;
810    _exp = 0x404;
811    mant = 0xf000000000000ULL;
812    register_farg(&spec_fargs[i++], s, _exp, mant);
813 
814    /* None of the ftdiv tests succeed.
815     * FRA = value #0; FRB = value #1
816     * ea_ = -2; e_b = 5
817     * fl_flag || fg_flag || fe_flag = 100
818     */
819 
820    /*************************************************
821     *     fe_flag tests
822     *
823     *************************************************/
824 
825    /* fe_flag <- 1 if FRA is a NaN
826     * FRA = value #9; FRB = value #1
827     * e_a = 1024; e_b = 5
828     * fl_flag || fg_flag || fe_flag = 101
829     */
830 
831    /* fe_flag <- 1 if FRB is a NaN
832     * FRA = value #1; FRB = value #12
833     * e_a = 5; e_b = 1024
834     * fl_flag || fg_flag || fe_flag = 101
835     */
836 
837    /* fe_flag <- 1 if e_b <= -1022
838     * FRA = value #0; FRB = value #2
839     * e_a = -2; e_b = -1022
840     * fl_flag || fg_flag || fe_flag = 101
841     *
842     */
843    // #2
844    s = 0;
845    _exp = 0x001;
846    mant = 0x8000000b77501ULL;
847    register_farg(&spec_fargs[i++], s, _exp, mant);
848 
849    /* fe_flag <- 1 if e_b >= 1021
850     * FRA = value #1; FRB = value #3
851     * e_a = 5; e_b = 1023
852     * fl_flag || fg_flag || fe_flag = 101
853     */
854    // #3
855    s = 0;
856    _exp = 0x7fe;
857    mant = 0x800000000051bULL;
858    register_farg(&spec_fargs[i++], s, _exp, mant);
859 
860    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
861     * Let FRA = value #3 and FRB be value #0.
862     * e_a = 1023; e_b = -2
863     * fl_flag || fg_flag || fe_flag = 101
864     */
865 
866    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
867     * Let FRA = value #0 above and FRB be value #3 above
868     * e_a = -2; e_b = 1023
869     * fl_flag || fg_flag || fe_flag = 101
870     */
871 
872    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
873     * Let FRA = value #4 and FRB be value #0
874     * e_a = -1005; e_b = -2
875     * fl_flag || fg_flag || fe_flag = 101
876    */
877    // #4
878    s = 0;
879    _exp = 0x012;
880    mant = 0x3214569900000ULL;
881    register_farg(&spec_fargs[i++], s, _exp, mant);
882 
883    /*************************************************
884     *     fg_flag tests
885     *
886     *************************************************/
887    /* fg_flag <- 1 if FRA is an Infinity
888     * NOTE: FRA = Inf also sets fe_flag
889     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
890     * Test 1:
891     *   Let FRA be value #7 and FRB be value #1
892     *   e_a = 1024; e_b = 5
893     *   fl_flag || fg_flag || fe_flag = 111
894     *
895     * Test 2:
896     *   Let FRA be value #8 and FRB be value #1
897     *   e_a = 1024; e_b = 5
898     *   fl_flag || fg_flag || fe_flag = 111
899     *
900     */
901 
902    /* fg_flag <- 1 if FRB is an Infinity
903     * NOTE: FRB = Inf also sets fe_flag
904     * Let FRA be value #1 and FRB be value #7
905     * e_a = 5; e_b = 1024
906     * fl_flag || fg_flag || fe_flag = 111
907     */
908 
909    /* fg_flag <- 1 if FRB is denormalized
910     * NOTE: e_b < -1022 ==> fe_flag <- 1
911     * Let FRA be value #0 and FRB be value #13
912     * e_a = -2; e_b = -1023
913     * fl_flag || fg_flag || fe_flag = 111
914     */
915 
916    /* fg_flag <- 1 if FRB is +zero
917     * NOTE: FRA = Inf also sets fe_flag
918     * Let FRA = val #5; FRB = val #5
919     * ea_ = -1023; e_b = -1023
920     * fl_flag || fg_flag || fe_flag = 111
921     */
922 
923    /* fg_flag <- 1 if FRB is -zero
924     * NOTE: FRA = Inf also sets fe_flag
925     * Let FRA = val #5; FRB = val #6
926     * ea_ = -1023; e_b = -1023
927     * fl_flag || fg_flag || fe_flag = 111
928     */
929 
930    /* Special values */
931    /* +0.0      : 0 0x000 0x0000000000000 */
932    // #5
933    s = 0;
934    _exp = 0x000;
935    mant = 0x0000000000000ULL;
936    register_farg(&spec_fargs[i++], s, _exp, mant);
937 
938    /* -0.0      : 1 0x000 0x0000000000000 */
939    // #6
940    s = 1;
941    _exp = 0x000;
942    mant = 0x0000000000000ULL;
943    register_farg(&spec_fargs[i++], s, _exp, mant);
944 
945    /* +infinity : 0 0x7FF 0x0000000000000  */
946    // #7
947    s = 0;
948    _exp = 0x7FF;
949    mant = 0x0000000000000ULL;
950    register_farg(&spec_fargs[i++], s, _exp, mant);
951 
952    /* -infinity : 1 0x7FF 0x0000000000000 */
953    // #8
954    s = 1;
955    _exp = 0x7FF;
956    mant = 0x0000000000000ULL;
957    register_farg(&spec_fargs[i++], s, _exp, mant);
958 
959    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
960    // #9
961    s = 0;
962    _exp = 0x7FF;
963    mant = 0x7FFFFFFFFFFFFULL;
964    register_farg(&spec_fargs[i++], s, _exp, mant);
965 
966    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
967    // #10
968    s = 1;
969    _exp = 0x7FF;
970    mant = 0x7FFFFFFFFFFFFULL;
971    register_farg(&spec_fargs[i++], s, _exp, mant);
972 
973    /* +QNaN     : 0 0x7FF 0x8000000000000 */
974    // #11
975    s = 0;
976    _exp = 0x7FF;
977    mant = 0x8000000000000ULL;
978    register_farg(&spec_fargs[i++], s, _exp, mant);
979 
980    /* -QNaN     : 1 0x7FF 0x8000000000000 */
981    // #12
982    s = 1;
983    _exp = 0x7FF;
984    mant = 0x8000000000000ULL;
985    register_farg(&spec_fargs[i++], s, _exp, mant);
986 
987    /* denormalized value */
988    // #13
989    s = 1;
990    _exp = 0x000;
991    mant = 0x8340000078000ULL;
992    register_farg(&spec_fargs[i++], s, _exp, mant);
993 
994    /* Negative finite number */
995    // #14
996    s = 1;
997    _exp = 0x40d;
998    mant = 0x0650f5a07b353ULL;
999    register_farg(&spec_fargs[i++], s, _exp, mant);
1000 
1001    nb_special_fargs = i;
1002 }
1003 
1004 
1005 struct test_table
1006 {
1007    test_func_t test_category;
1008    char * name;
1009 };
1010 
1011 struct p7_fp_test
1012 {
1013    test_func_t test_func;
1014    const char *name;
1015    int single;  // 1=single precision result; 0=double precision result
1016 };
1017 
1018 typedef enum {
1019    VX_FP_CMP,
1020    VX_FP_SMA,
1021    VX_FP_SMS,
1022    VX_FP_SNMA,
1023    VX_FP_OTHER
1024 } vx_fp_test_type;
1025 
1026 struct vx_fp_test
1027 {
1028    test_func_t test_func;
1029    const char *name;
1030    fp_test_args_t * targs;
1031    int num_tests;
1032    vx_fp_test_type test_type;
1033 };
1034 
1035 struct xs_conv_test
1036 {
1037    test_func_t test_func;
1038    const char *name;
1039    int num_tests;
1040 };
1041 
1042 typedef enum {
1043    VSX_LOAD =1,
1044    VSX_LOAD_SPLAT,
1045    VSX_STORE
1046 } vsx_ldst_type;
1047 
1048 struct ldst_test
1049 {
1050    test_func_t test_func;
1051    const char *name;
1052    void * base_addr;
1053    uint32_t offset;
1054    int num_words_to_process;
1055    vsx_ldst_type type;
1056 };
1057 
1058 typedef enum {
1059    VSX_AND = 1,
1060    VSX_XOR,
1061    VSX_ANDC,
1062    VSX_OR,
1063    VSX_NOR
1064 } vsx_log_op;
1065 
1066 struct vsx_logic_test
1067 {
1068    test_func_t test_func;
1069    const char *name;
1070    vsx_log_op op;
1071 };
1072 
1073 struct vsx_move_test
1074 {
1075    test_func_t test_func;
1076    const char *name;
1077 };
1078 
1079 struct vsx_permute_test
1080 {
1081    test_func_t test_func;
1082    const char *name;
1083    unsigned int xa[4];
1084    unsigned int xb[4];
1085 };
1086 
1087 static vector unsigned int vec_out, vec_inA, vec_inB;
1088 
test_lxsdx(void)1089 static void test_lxsdx(void)
1090 {
1091    __asm__ __volatile__ ("lxsdx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1092 }
1093 
1094 static void
test_lxvd2x(void)1095 test_lxvd2x(void)
1096 {
1097    __asm__ __volatile__ ("lxvd2x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1098 }
1099 
test_lxvdsx(void)1100 static void test_lxvdsx(void)
1101 {
1102    __asm__ __volatile__ ("lxvdsx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1103 }
1104 
test_lxvw4x(void)1105 static void test_lxvw4x(void)
1106 {
1107    __asm__ __volatile__ ("lxvw4x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1108 }
1109 
test_stxsdx(void)1110 static void test_stxsdx(void)
1111 {
1112    __asm__ __volatile__ ("stxsdx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1113 }
1114 
test_stxvd2x(void)1115 static void test_stxvd2x(void)
1116 {
1117    __asm__ __volatile__ ("stxvd2x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1118 }
1119 
test_stxvw4x(void)1120 static void test_stxvw4x(void)
1121 {
1122    __asm__ __volatile__ ("stxvw4x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1123 }
1124 
test_xxlxor(void)1125 static void test_xxlxor(void)
1126 {
1127    __asm__ __volatile__ ("xxlxor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1128 }
1129 
test_xxlor(void)1130 static void test_xxlor(void)
1131 {
1132    __asm__ __volatile__ ("xxlor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1133 }
1134 
test_xxlnor(void)1135 static void test_xxlnor(void)
1136 {
1137    __asm__ __volatile__ ("xxlnor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1138 }
1139 
test_xxland(void)1140 static void test_xxland(void)
1141 {
1142    __asm__ __volatile__ ("xxland          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1143 }
1144 
test_xxlandc(void)1145 static void test_xxlandc(void)
1146 {
1147    __asm__ __volatile__ ("xxlandc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1148 }
1149 
test_xxmrghw(void)1150 static void test_xxmrghw(void)
1151 {
1152    __asm__ __volatile__ ("xxmrghw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1153 }
1154 
test_xxmrglw(void)1155 static void test_xxmrglw(void)
1156 {
1157    __asm__ __volatile__ ("xxmrglw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1158 }
1159 
test_xxpermdi_00(void)1160 static void test_xxpermdi_00(void)
1161 {
1162    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1163 }
1164 
test_xxpermdi_01(void)1165 static void test_xxpermdi_01(void)
1166 {
1167    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1168 }
1169 
test_xxpermdi_10(void)1170 static void test_xxpermdi_10(void)
1171 {
1172    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1173 }
1174 
test_xxpermdi_11(void)1175 static void test_xxpermdi_11(void)
1176 {
1177    __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1178 }
1179 
test_xxsldwi_0(void)1180 static void test_xxsldwi_0(void)
1181 {
1182    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1183 }
1184 
test_xxsldwi_1(void)1185 static void test_xxsldwi_1(void)
1186 {
1187    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1188 }
1189 
test_xxsldwi_2(void)1190 static void test_xxsldwi_2(void)
1191 {
1192    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1193 }
1194 
test_xxsldwi_3(void)1195 static void test_xxsldwi_3(void)
1196 {
1197    __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1198 }
1199 
test_fcfids(void)1200 static void test_fcfids (void)
1201 {
1202     __asm__ __volatile__ ("fcfids          %0, %1" : "=f" (f17): "d" (f14));
1203 }
1204 
test_fcfidus(void)1205 static void test_fcfidus (void)
1206 {
1207     __asm__ __volatile__ ("fcfidus          %0, %1" : "=f" (f17): "d" (f14));
1208 }
1209 
test_fcfidu(void)1210 static void test_fcfidu (void)
1211 {
1212     __asm__ __volatile__ ("fcfidu          %0, %1" : "=f" (f17): "d" (f14));
1213 }
1214 
test_xsabsdp(void)1215 static void test_xsabsdp (void)
1216 {
1217    __asm__ __volatile__ ("xsabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1218 }
1219 
test_xscpsgndp(void)1220 static void test_xscpsgndp (void)
1221 {
1222    __asm__ __volatile__ ("xscpsgndp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1223 }
1224 
test_xsnabsdp(void)1225 static void test_xsnabsdp (void)
1226 {
1227    __asm__ __volatile__ ("xsnabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1228 }
1229 
test_xsnegdp(void)1230 static void test_xsnegdp (void)
1231 {
1232    __asm__ __volatile__ ("xsnegdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1233 }
1234 
1235 static int do_cmpudp;
test_xscmp(void)1236 static void test_xscmp (void)
1237 {
1238    if (do_cmpudp)
1239       __asm__ __volatile__ ("xscmpudp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1240    else
1241       __asm__ __volatile__ ("xscmpodp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1242 }
1243 
test_xsadddp(void)1244 static void test_xsadddp(void)
1245 {
1246    __asm__ __volatile__ ("xsadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1247 }
1248 
test_xsdivdp(void)1249 static void test_xsdivdp(void)
1250 {
1251    __asm__ __volatile__ ("xsdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1252 }
1253 
1254 static int do_adp;
test_xsmadd(void)1255 static void test_xsmadd(void)
1256 {
1257    if (do_adp)
1258       __asm__ __volatile__ ("xsmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1259    else
1260       __asm__ __volatile__ ("xsmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1261 }
1262 
test_xsmsub(void)1263 static void test_xsmsub(void)
1264 {
1265    if (do_adp)
1266       __asm__ __volatile__ ("xsmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1267    else
1268       __asm__ __volatile__ ("xsmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1269 }
1270 
test_xsnmadd(void)1271 static void test_xsnmadd(void)
1272 {
1273    if (do_adp)
1274       __asm__ __volatile__ ("xsnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1275    else
1276       __asm__ __volatile__ ("xsnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1277 }
1278 
test_xsmuldp(void)1279 static void test_xsmuldp(void)
1280 {
1281    __asm__ __volatile__ ("xsmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1282 }
1283 
test_xssubdp(void)1284 static void test_xssubdp(void)
1285 {
1286    __asm__ __volatile__ ("xssubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1287 }
1288 
test_xscvdpsxds(void)1289 static void test_xscvdpsxds (void)
1290 {
1291    __asm__ __volatile__ ("xscvdpsxds          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1292 }
1293 
test_xscvsxddp(void)1294 static void test_xscvsxddp (void)
1295 {
1296    __asm__ __volatile__ ("xscvsxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1297 }
1298 
test_xscvuxddp(void)1299 static void test_xscvuxddp (void)
1300 {
1301    __asm__ __volatile__ ("xscvuxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1302 }
1303 
1304 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
1305                                                               0, 0, 0, 0 };
1306 
1307 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
1308 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
1309 
1310 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567,
1311                                                                 0x89abcdef,
1312                                                                 0x00112233,
1313                                                                 0x44556677,
1314                                                                 0x8899aabb,
1315                                                                 0x91929394,
1316                                                                 0xa1a2a3a4,
1317                                                                 0xb1b2b3b4,
1318                                                                 0xc1c2c3c4,
1319                                                                 0xd1d2d3d4,
1320                                                                 0x7a6b5d3e
1321 };
1322 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
1323 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
1324 
1325 static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD },
1326                                      { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD },
1327                                      { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD },
1328                                      { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD },
1329                                      { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT },
1330                                      { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT },
1331                                      { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD },
1332                                      { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD },
1333                                      { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE },
1334                                      { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE },
1335                                      { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE },
1336                                      { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE },
1337                                      { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE },
1338                                      { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE },
1339                                      { NULL, NULL, NULL, 0, 0, 0 } };
1340 
1341 static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR },
1342                                       { &test_xxlor, "xxlor", VSX_OR } ,
1343                                       { &test_xxlnor, "xxlnor", VSX_NOR },
1344                                       { &test_xxland, "xxland", VSX_AND },
1345                                       { &test_xxlandc, "xxlandc", VSX_ANDC },
1346                                       { NULL, NULL, 0}};
1347 
1348 static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" },
1349                                     { &test_xscpsgndp, "xscpsgndp" },
1350                                     { &test_xsnabsdp, "xsnabsdp" },
1351                                     { &test_xsnegdp, "xsnegdp" },
1352                                     { NULL, NULL }
1353 
1354 };
1355 
1356 static permute_test_t permute_tests[] =
1357 {
1358   { &test_xxmrghw, "xxmrghw",
1359     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1360     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1361   },
1362   { &test_xxmrghw, "xxmrghw",
1363     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
1364     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
1365   },
1366   { &test_xxmrglw, "xxmrglw",
1367     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1368     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1369   },
1370   { &test_xxmrglw, "xxmrglw",
1371     { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
1372     { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
1373   },
1374   { &test_xxpermdi_00, "xxpermdi DM=00",
1375     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1376     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1377   },
1378   { &test_xxpermdi_01, "xxpermdi DM=01",
1379     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1380     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1381   },
1382   { &test_xxpermdi_10, "xxpermdi DM=10",
1383     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1384     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1385   },
1386   { &test_xxpermdi_11, "xxpermdi DM=11",
1387     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1388     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1389   },
1390   { &test_xxsldwi_0, "xxsldwi SHW=0",
1391     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1392     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1393   },
1394   { &test_xxsldwi_1, "xxsldwi SHW=1",
1395     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1396     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1397   },
1398   { &test_xxsldwi_2, "xxsldwi SHW=2",
1399     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1400     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1401   },
1402   { &test_xxsldwi_3, "xxsldwi SHW=3",
1403     { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1404     { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1405   },
1406   { NULL, NULL }
1407 };
1408 
1409 static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 },
1410                                 { &test_fcfidus, "fcfidus", 1 },
1411                                 { &test_fcfidu, "fcfidu", 1 },
1412                                 { NULL, NULL, 0 },
1413 
1414 };
1415 
1416 static vx_fp_test_t vx_fp_tests[] = {
1417                                      { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP},
1418                                      { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER},
1419                                      { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER},
1420                                      { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA},
1421                                      { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS},
1422                                      { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA},
1423                                      { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER},
1424                                      { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER},
1425                                      { NULL, NULL, NULL, 0, 0 }
1426 };
1427 
1428 static xs_conv_test_t xs_conv_tests[] = {
1429                                          { &test_xscvdpsxds, "xscvdpsxds", 15},
1430                                          { &test_xscvsxddp, "xscvsxddp", 15},
1431                                          { &test_xscvuxddp, "xscvuxddp", 15},
1432                                          { NULL, NULL, 0}
1433 };
1434 
1435 #ifdef __powerpc64__
test_ldbrx(void)1436 static void test_ldbrx(void)
1437 {
1438    int i;
1439    HWord_t reg_out;
1440    unsigned char * byteIn, * byteOut;
1441    r14 = (HWord_t)viargs;
1442    // Just try the instruction an arbitrary number of times at different r15 offsets.
1443    for (i = 0; i < 3; i++) {
1444       int j, k;
1445       reg_out = 0;
1446       r15 = i * 4;
1447       __asm__ __volatile__ ("ldbrx          %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15));
1448       byteIn = ((unsigned char *)(r14 + r15));
1449       byteOut = (unsigned char *)&reg_out;
1450 
1451       printf("ldbrx:");
1452       for (k = 0; k < 8; k++) {
1453          printf( " %02x", (byteIn[k]));
1454       }
1455       printf(" (reverse) =>");
1456       for (j = 0; j < 8; j++) {
1457          printf( " %02x", (byteOut[j]));
1458       }
1459       printf("\n");
1460    }
1461    printf( "\n" );
1462 }
1463 
1464 static void
test_popcntd(void)1465 test_popcntd(void)
1466 {
1467    uint64_t res;
1468    unsigned long long src = 0x9182736405504536ULL;
1469    r14 = src;
1470    __asm__ __volatile__ ("popcntd          %0, %1" : "=r" (res): "r" (r14));
1471    printf("popcntd: 0x%llx => %d\n", src, (int)res);
1472    printf( "\n" );
1473 }
1474 #endif
1475 
1476 static void
test_lfiwzx(void)1477 test_lfiwzx(void)
1478 {
1479    unsigned int i;
1480    unsigned int * src;
1481    uint64_t reg_out;
1482    r14 = (HWord_t)viargs;
1483    // Just try the instruction an arbitrary number of times at different r15 offsets.
1484    for (i = 0; i < 3; i++) {
1485       reg_out = 0;
1486       r15 = i * 4;
1487       __asm__ __volatile__ ("lfiwzx          %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15));
1488       src = ((unsigned int *)(r14 + r15));
1489       printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out);
1490 
1491    }
1492    printf( "\n" );
1493 }
1494 
test_vx_fp_ops(void)1495 static void test_vx_fp_ops(void)
1496 {
1497 
1498    test_func_t func;
1499    int k;
1500    char * test_name = (char *)malloc(20);
1501    k = 0;
1502 
1503    build_special_fargs_table();
1504    while ((func = vx_fp_tests[k].test_func)) {
1505       int i, condreg, repeat = 0;
1506       unsigned int flags;
1507       unsigned long long * frap, * frbp, * dst;
1508       vx_fp_test_t test_group = vx_fp_tests[k];
1509       vx_fp_test_type test_type = test_group.test_type;
1510 
1511       switch (test_type) {
1512          case VX_FP_CMP:
1513             strcpy(test_name, "xscmp");
1514             if (!repeat) {
1515                repeat = 1;
1516                strcat(test_name, "udp");
1517                do_cmpudp = 1;
1518             }
1519             break;
1520          case VX_FP_SMA:
1521          case VX_FP_SMS:
1522          case VX_FP_SNMA:
1523             if (test_type == VX_FP_SMA)
1524                strcpy(test_name, "xsmadd");
1525             else if (test_type == VX_FP_SMS)
1526                strcpy(test_name, "xsmsub");
1527             else
1528                strcpy(test_name, "xsnmadd");
1529             if (!repeat) {
1530                repeat = 1;
1531                strcat(test_name, "adp");
1532                do_adp = 1;
1533             }
1534             break;
1535          case VX_FP_OTHER:
1536             strcpy(test_name, test_group.name);
1537             break;
1538          default:
1539             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
1540             exit(1);
1541       }
1542 
1543 again:
1544       for (i = 0; i < test_group.num_tests; i++) {
1545          unsigned int * inA, * inB, * pv;
1546          double * dpA = (double *)&vec_inA;
1547          double * dpB = (double *)&vec_inB;
1548          double * dpT = (double *)&vec_out;
1549 
1550          fp_test_args_t aTest = test_group.targs[i];
1551          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
1552          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
1553          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1554          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1555          // Only need to copy one doubleword into each vector's element 0
1556          if (isLE) {
1557             // With LE, vector element 0 is the second doubleword from the left
1558             memset(dpA, 0, 8);
1559             memset(dpB, 0, 8);
1560             dpA++;
1561             dpB++;
1562          }
1563          memcpy(dpA, inA, 8);
1564          memcpy(dpB, inB, 8);
1565 
1566          switch (test_type) {
1567             case VX_FP_CMP:
1568                SET_FPSCR_ZERO;
1569                SET_CR_XER_ZERO;
1570                (*func)();
1571                GET_CR(flags);
1572                condreg = (flags & 0x0f000000) >> 24;
1573                printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg);
1574               // printf("\tFRA: %e;  FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
1575                if ( condreg != aTest.cr_flags) {
1576                   printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg);
1577                }
1578                break;
1579             case VX_FP_SMA:
1580             case VX_FP_SMS:
1581             case VX_FP_SNMA:
1582             case VX_FP_OTHER:
1583             {
1584                int idx;
1585                unsigned long long vsr_XT;
1586                pv = (unsigned int *)&vec_out;
1587                // clear vec_out
1588                for (idx = 0; idx < 4; idx++, pv++)
1589                   *pv = 0;
1590 
1591                if (test_type != VX_FP_OTHER) {
1592                   /* Then we need a third src argument, which is stored in element 0 of
1593                    * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
1594                    * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
1595                    * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
1596                    * data (input args) contain only two inputs, so I arbitrarily
1597                    * use spec_fargs elements 4 and 14 (alternating) for the third source
1598                    * argument.  We can use the same input data for a given pair of
1599                    * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
1600                    * the expected result should be the same.
1601                    */
1602                   int extra_arg_idx;
1603                   if (i % 2)
1604                      extra_arg_idx = 4;
1605                   else
1606                      extra_arg_idx = 14;
1607 
1608                   if (repeat) {
1609                      /* We're on the first time through of one of the VX_FP_SMx
1610                       * test types, meaning we're testing a xs<ZZZ>adp case, thus we
1611                       * have to swap inputs as described above:
1612                       *    src2 <= VSX[XT]
1613                       *    src3 <= VSX[XB]
1614                       */
1615                      if (isLE)
1616                         dpT++;
1617                      memcpy(dpT, inB, 8);  // src2
1618                      memcpy(dpB, &spec_fargs[extra_arg_idx], 8);  //src3
1619                      frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
1620                   } else {
1621                      // Don't need to init src2, as it's done before the switch()
1622                      if (isLE)
1623                         dpT++;
1624                      memcpy(dpT, &spec_fargs[extra_arg_idx], 8);  //src3
1625                   }
1626                   memcpy(&vsr_XT, dpT, 8);
1627                }
1628 
1629                (*func)();
1630                dst = (unsigned long long *) &vec_out;
1631                if (isLE)
1632                   dst++;
1633                if (test_type == VX_FP_OTHER)
1634                   printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst);
1635                else
1636                   printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1637                           test_name, vsr_XT, *frap, *frbp, *dst );
1638 
1639                /*
1640               {
1641                   // Debug code.  Keep this block commented out except when debugging.
1642                   double result, expected;
1643                   memcpy(&result, dst, 8);
1644                   memcpy(&expected, &aTest.dp_bin_result, 8);
1645                   printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1646                           spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1647                           expected, result );
1648                }
1649               */
1650                break;
1651             }
1652          }
1653 
1654 
1655       }
1656       printf( "\n" );
1657 
1658       if (repeat) {
1659          repeat = 0;
1660          switch (test_type) {
1661             case VX_FP_CMP:
1662                strcpy(test_name, "xscmp");
1663                strcat(test_name, "odp");
1664                do_cmpudp = 0;
1665                break;
1666             case VX_FP_SMA:
1667             case VX_FP_SMS:
1668             case VX_FP_SNMA:
1669                if (test_type == VX_FP_SMA)
1670                   strcpy(test_name, "xsmadd");
1671                else if (test_type == VX_FP_SMS)
1672                   strcpy(test_name, "xsmsub");
1673                else
1674                   strcpy(test_name, "xsnmadd");
1675                strcat(test_name, "mdp");
1676                do_adp = 0;
1677                break;
1678             case VX_FP_OTHER:
1679                break;
1680          }
1681          goto again;
1682       }
1683       k++;
1684    }
1685    printf( "\n" );
1686    free(test_name);
1687 }
1688 
test_xs_conv_ops(void)1689 static void test_xs_conv_ops(void)
1690 {
1691 
1692    test_func_t func;
1693    int k = 0;
1694    double * dpB = (double *)&vec_inB;
1695    if (isLE) {
1696       memset(dpB, 0, 8);
1697       dpB++;
1698    }
1699 
1700    build_special_fargs_table();
1701    while ((func = xs_conv_tests[k].test_func)) {
1702       int i;
1703       unsigned long long * frbp, * dst;
1704       xs_conv_test_t test_group = xs_conv_tests[k];
1705       for (i = 0; i < test_group.num_tests; i++) {
1706          unsigned int * inB, * pv;
1707          int idx;
1708          inB = (unsigned int *)&spec_fargs[i];
1709          frbp = (unsigned long long *)&spec_fargs[i];
1710 
1711          memcpy(dpB, inB, 8);
1712          pv = (unsigned int *)&vec_out;
1713          // clear vec_out
1714          for (idx = 0; idx < 4; idx++, pv++)
1715             *pv = 0;
1716          (*func)();
1717          dst = (unsigned long long *) &vec_out;
1718          if (isLE)
1719             dst++;
1720          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst);
1721 
1722       }
1723       k++;
1724       printf("\n");
1725    }
1726    printf( "\n" );
1727 }
1728 
do_load_test(ldst_test_t loadTest)1729 static void do_load_test(ldst_test_t loadTest)
1730 {
1731    test_func_t func;
1732    unsigned int *src, *dst;
1733    int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0;
1734    int i, j, m, k;
1735    i = j = 0;
1736 
1737    func = loadTest.test_func;
1738    for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) {
1739       int again;
1740       j = 0;
1741        r14 += i * 16;
1742       do {
1743          unsigned int * pv = (unsigned int *)&vec_out;
1744          int idx;
1745          // clear vec_out
1746          for (idx = 0; idx < 4; idx++, pv+=idx)
1747             *pv = 0;
1748 
1749          again = 0;
1750          r15 = j;
1751 
1752          // execute test insn
1753          (*func)();
1754 
1755          src = (unsigned int*) (((unsigned char *)r14) + j);
1756          dst = (unsigned int*) &vec_out;
1757 
1758          printf( "%s:", loadTest.name);
1759          for (m = 0; m < loadTest.num_words_to_process; m++) {
1760             printf( " %08x", src[splat ? m % 2 : m]);
1761          }
1762          printf( " =>");
1763          m = 0;
1764          k = loadTest.num_words_to_process;
1765          if (isLE) {
1766             if (loadTest.num_words_to_process == 2) {
1767                m = 2;
1768                k += 2;
1769             }
1770          }
1771 
1772          for (; m < k; m++) {
1773             printf( " %08x", dst[m]);
1774          }
1775          printf("\n");
1776          if (j == 0 && loadTest.offset) {
1777             again = 1;
1778             j += loadTest.offset;
1779          }
1780       }
1781       while (again);
1782    }
1783 }
1784 
1785 static void
do_store_test(ldst_test_t storeTest)1786 do_store_test ( ldst_test_t storeTest )
1787 {
1788    test_func_t func;
1789    unsigned int *src, *dst;
1790    int m;
1791 
1792    func = storeTest.test_func;
1793    r14 = (HWord_t) storeTest.base_addr;
1794    r15 = (HWord_t) storeTest.offset;
1795    unsigned int * pv = (unsigned int *) storeTest.base_addr;
1796    int idx;
1797    // clear out storage destination
1798    for (idx = 0; idx < 4; idx++, pv += idx)
1799       *pv = 0;
1800 
1801    memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char));
1802 
1803    // execute test insn
1804    (*func)();
1805    src = &viargs[0];
1806    dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset);
1807 
1808    printf( "%s:", storeTest.name );
1809    for (m = 0; m < storeTest.num_words_to_process; m++) {
1810       printf( " %08x", src[m] );
1811    }
1812    printf( " =>" );
1813    for (m = 0; m < storeTest.num_words_to_process; m++) {
1814       printf( " %08x", dst[m] );
1815    }
1816    printf( "\n" );
1817 }
1818 
1819 
test_ldst(void)1820 static void test_ldst(void)
1821 {
1822    int k = 0;
1823 
1824    while (ldst_tests[k].test_func) {
1825       if (ldst_tests[k].type == VSX_STORE)
1826          do_store_test(ldst_tests[k]);
1827       else
1828          do_load_test(ldst_tests[k]);
1829       k++;
1830       printf("\n");
1831    }
1832 }
1833 
test_ftdiv(void)1834 static void test_ftdiv(void)
1835 {
1836    int i, num_tests, crx;
1837    unsigned int flags;
1838    unsigned long long * frap, * frbp;
1839    build_special_fargs_table();
1840 
1841    num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0];
1842 
1843    for (i = 0; i < num_tests; i++) {
1844       fp_test_args_t aTest = ftdiv_tests[i];
1845       f14 = spec_fargs[aTest.fra_idx];
1846       f15 = spec_fargs[aTest.frb_idx];
1847       frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1848       frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1849       SET_FPSCR_ZERO;
1850       SET_CR_XER_ZERO;
1851       __asm__ __volatile__ ("ftdiv           cr1, %0, %1" : : "d" (f14), "d" (f15));
1852       GET_CR(flags);
1853       crx = (flags & 0x0f000000) >> 24;
1854       printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx);
1855 //      printf("\tFRA: %e;  FRB: %e\n", f14, f15);
1856       if ( crx != aTest.cr_flags) {
1857          printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx);
1858       }
1859    }
1860    printf( "\n" );
1861 }
1862 
1863 
test_p7_fpops(void)1864 static void test_p7_fpops ( void )
1865 {
1866    int k = 0;
1867    test_func_t func;
1868 
1869    build_fargs_table();
1870    while ((func = fp_tests[k].test_func)) {
1871       float res;
1872       double resd;
1873       unsigned long long u0;
1874       int i;
1875       int res32 = strcmp(fp_tests[k].name, "fcfidu");
1876 
1877       for (i = 0; i < nb_fargs; i++) {
1878          u0 = *(unsigned long long *) (&fargs[i]);
1879          f14 = fargs[i];
1880          (*func)();
1881          if (res32) {
1882             res = f17;
1883             printf( "%s %016llx => (raw sp) %08x)",
1884                     fp_tests[k].name, u0, *((unsigned int *)&res));
1885          } else {
1886             resd = f17;
1887             printf( "%s %016llx => (raw sp) %016llx)",
1888                     fp_tests[k].name, u0, *(unsigned long long *)(&resd));
1889          }
1890          printf( "\n" );
1891       }
1892 
1893       k++;
1894       printf( "\n" );
1895    }
1896 }
1897 
test_vsx_logic(void)1898 static void test_vsx_logic(void)
1899 {
1900    logic_test_t aTest;
1901    test_func_t func;
1902    int k;
1903    k = 0;
1904 
1905    while ((func = logic_tests[k].test_func)) {
1906       unsigned int * pv;
1907       int startA, startB;
1908       unsigned int * inA, * inB, * dst;
1909       int idx, i;
1910       startA = 0;
1911       aTest = logic_tests[k];
1912       for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) {
1913          startB = startA + 4;
1914          pv = (unsigned int *)&vec_out;
1915          inA = &viargs[startA];
1916          inB = &viargs[startB];
1917          memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1918          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1919          // clear vec_out
1920          for (idx = 0; idx < 4; idx++, pv++)
1921             *pv = 0;
1922 
1923          // execute test insn
1924          (*func)();
1925          dst = (unsigned int*) &vec_out;
1926 
1927          printf( "%s:", aTest.name);
1928          printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name);
1929          printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]);
1930          printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1931 
1932       }
1933       k++;
1934    }
1935    printf( "\n" );
1936 }
1937 
1938 static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) =
1939 {
1940  { 0x0123456789abcdefULL, 0x0011223344556677ULL},
1941  { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL},
1942  { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL}
1943 };
1944 #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0])
1945 
test_move_ops(void)1946 static void test_move_ops (void)
1947 {
1948    move_test_t aTest;
1949    test_func_t func;
1950    int k;
1951    k = 0;
1952 
1953    while ((func = move_tests[k].test_func)) {
1954       unsigned int * pv;
1955       int startA, startB;
1956       unsigned long long * inA, * inB, * dst;
1957       int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0);
1958       int idx;
1959       inA = NULL;
1960       aTest = move_tests[k];
1961       for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) {
1962          inB = (unsigned long long *)&vec_args[startB];
1963          memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1964          if (isLE)
1965             inB++;
1966          startA = 0;
1967 repeat:
1968          if (use_vecA) {
1969             inA = (unsigned long long *)&vec_args[startA];
1970             memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1971             startA++;
1972          }
1973          pv = (unsigned int *)&vec_out;
1974          // clear vec_out
1975          for (idx = 0; idx < 4; idx++, pv++)
1976             *pv = 0;
1977 
1978          // execute test insn
1979          (*func)();
1980          dst = (unsigned long long *) &vec_out;
1981          if (isLE) {
1982             dst++;
1983             inA++;
1984          }
1985 
1986          printf( "%s:", aTest.name);
1987          if (use_vecA)
1988             printf( " X[A]: %016llx ", *inA);
1989          printf( " X[B]: %016llx", *inB);
1990          printf(" => %016llx\n", *dst);
1991 
1992          if (use_vecA && startA < NUM_VEC_ARGS_LONGS)
1993             goto repeat;
1994       }
1995       k++;
1996       printf( "\n" );
1997    }
1998 }
1999 
test_permute_ops(void)2000 static void test_permute_ops (void)
2001 {
2002   permute_test_t *aTest;
2003   unsigned int *dst = (unsigned int *) &vec_out;
2004 
2005   for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++)
2006     {
2007       /* Grab test input and clear output vector.  */
2008       memcpy(&vec_inA, aTest->xa, sizeof(vec_inA));
2009       memcpy(&vec_inB, aTest->xb, sizeof(vec_inB));
2010       memset(dst, 0, sizeof(vec_out));
2011 
2012       /* execute test insn */
2013       aTest->test_func();
2014 
2015       printf( "%s:\n", aTest->name);
2016       printf( "        XA[%08x,%08x,%08x,%08x]\n",
2017               aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]);
2018       printf( "        XB[%08x,%08x,%08x,%08x]\n",
2019               aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]);
2020       printf( "   =>   XT[%08x,%08x,%08x,%08x]\n",
2021               dst[0], dst[1], dst[2], dst[3]);
2022 
2023     }
2024   printf( "\n" );
2025 }
2026 
2027 static test_table_t all_tests[] = { { &test_ldst,
2028                                        "Test VSX load/store instructions" },
2029                                      { &test_vsx_logic,
2030                                        "Test VSX logic instructions" },
2031 #ifdef __powerpc64__
2032                                      { &test_ldbrx,
2033                                        "Test ldbrx instruction" },
2034                                      { &test_popcntd,
2035                                        "Test popcntd instruction" },
2036 #endif
2037                                      { &test_lfiwzx,
2038                                        "Test lfiwzx instruction" },
2039                                      { &test_p7_fpops,
2040                                        "Test P7 floating point convert instructions"},
2041                                      { &test_ftdiv,
2042                                        "Test ftdiv instruction" },
2043                                      { &test_move_ops,
2044                                        "Test VSX move instructions"},
2045                                      { &test_permute_ops,
2046                                        "Test VSX permute instructions"},
2047                                      { &test_vx_fp_ops,
2048                                        "Test VSX floating point instructions"},
2049                                      { &test_xs_conv_ops,
2050                                        "Test VSX scalar integer conversion instructions" },
2051                                      { NULL, NULL }
2052 };
2053 #endif // HAS_VSX
2054 
main(int argc,char * argv[])2055 int main(int argc, char *argv[])
2056 {
2057 #ifdef HAS_VSX
2058 
2059    test_table_t aTest;
2060    test_func_t func;
2061    int i = 0;
2062 
2063    while ((func = all_tests[i].test_category)) {
2064       aTest = all_tests[i];
2065       printf( "%s\n", aTest.name );
2066       (*func)();
2067       i++;
2068    }
2069 
2070 #endif // HAS _VSX
2071 
2072    return 0;
2073 }
2074