1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 #include <malloc.h>
6
7 typedef unsigned char UChar;
8 typedef unsigned int UInt;
9 typedef unsigned long int UWord;
10 typedef unsigned long long int ULong;
11
12 UChar randArray[1027] __attribute__((used));
13
14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
15
16 typedef union { UChar u8[32]; UInt u32[8]; } YMM;
17
18 typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block;
19
showYMM(YMM * vec)20 void showYMM ( YMM* vec )
21 {
22 int i;
23 assert(IS_32_ALIGNED(vec));
24 for (i = 31; i >= 0; i--) {
25 printf("%02x", (UInt)vec->u8[i]);
26 if (i > 0 && 0 == ((i+0) & 7)) printf(".");
27 }
28 }
29
showBlock(char * msg,Block * block)30 void showBlock ( char* msg, Block* block )
31 {
32 printf(" %s\n", msg);
33 printf(" "); showYMM(&block->a1); printf("\n");
34 printf(" "); showYMM(&block->a2); printf("\n");
35 printf(" "); showYMM(&block->a3); printf("\n");
36 printf(" "); showYMM(&block->a4); printf("\n");
37 printf(" %016llx\n", block->u64);
38 }
39
randUChar(void)40 UChar randUChar ( void )
41 {
42 static UInt seed = 80021;
43 seed = 1103515245 * seed + 12345;
44 return (seed >> 17) & 0xFF;
45 }
46
randBlock(Block * b)47 void randBlock ( Block* b )
48 {
49 int i;
50 UChar* p = (UChar*)b;
51 for (i = 0; i < sizeof(Block); i++)
52 p[i] = randUChar();
53 }
54
55
56 /* Generate a function test_NAME, that tests the given insn, in both
57 its mem and reg forms. The reg form of the insn may mention, as
58 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of
59 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
60 and %r14. It's OK for the insn to clobber ymm0, as this is needed
61 for testing PCMPxSTRx, and ymm6, as this is needed for testing
62 MOVMASK variants. */
63
64 #define GEN_test_RandM(_name, _reg_form, _mem_form) \
65 \
66 __attribute__ ((noinline)) static void test_##_name ( void ) \
67 { \
68 Block* b = memalign(32, sizeof(Block)); \
69 randBlock(b); \
70 printf("%s(reg)\n", #_name); \
71 showBlock("before", b); \
72 __asm__ __volatile__( \
73 "vmovdqa 0(%0),%%ymm7" "\n\t" \
74 "vmovdqa 32(%0),%%ymm8" "\n\t" \
75 "vmovdqa 64(%0),%%ymm6" "\n\t" \
76 "vmovdqa 96(%0),%%ymm9" "\n\t" \
77 "movq 128(%0),%%r14" "\n\t" \
78 _reg_form "\n\t" \
79 "vmovdqa %%ymm7, 0(%0)" "\n\t" \
80 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
81 "vmovdqa %%ymm6, 64(%0)" "\n\t" \
82 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
83 "movq %%r14, 128(%0)" "\n\t" \
84 : /*OUT*/ \
85 : /*IN*/"r"(b) \
86 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
87 ); \
88 showBlock("after", b); \
89 randBlock(b); \
90 printf("%s(mem)\n", #_name); \
91 showBlock("before", b); \
92 __asm__ __volatile__( \
93 "leaq 0(%0),%%rax" "\n\t" \
94 "vmovdqa 32(%0),%%ymm8" "\n\t" \
95 "vmovdqa 64(%0),%%ymm7" "\n\t" \
96 "vmovdqa 96(%0),%%ymm9" "\n\t" \
97 "movq 128(%0),%%r14" "\n\t" \
98 _mem_form "\n\t" \
99 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
100 "vmovdqa %%ymm7, 64(%0)" "\n\t" \
101 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
102 "movq %%r14, 128(%0)" "\n\t" \
103 : /*OUT*/ \
104 : /*IN*/"r"(b) \
105 : /*TRASH*/"xmm6", \
106 "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
107 ); \
108 showBlock("after", b); \
109 printf("\n"); \
110 free(b); \
111 }
112
113 #define GEN_test_Ronly(_name, _reg_form) \
114 GEN_test_RandM(_name, _reg_form, "")
115 #define GEN_test_Monly(_name, _mem_form) \
116 GEN_test_RandM(_name, "", _mem_form)
117
118 /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */
119
120 GEN_test_RandM(VPOR_256,
121 "vpor %%ymm6, %%ymm8, %%ymm7",
122 "vpor (%%rax), %%ymm8, %%ymm7")
123
124 GEN_test_RandM(VPXOR_256,
125 "vpxor %%ymm6, %%ymm8, %%ymm7",
126 "vpxor (%%rax), %%ymm8, %%ymm7")
127
128 GEN_test_RandM(VPSUBB_256,
129 "vpsubb %%ymm6, %%ymm8, %%ymm7",
130 "vpsubb (%%rax), %%ymm8, %%ymm7")
131
132 GEN_test_RandM(VPSUBD_256,
133 "vpsubd %%ymm6, %%ymm8, %%ymm7",
134 "vpsubd (%%rax), %%ymm8, %%ymm7")
135
136 GEN_test_RandM(VPADDD_256,
137 "vpaddd %%ymm6, %%ymm8, %%ymm7",
138 "vpaddd (%%rax), %%ymm8, %%ymm7")
139
140 GEN_test_RandM(VPMOVZXWD_256,
141 "vpmovzxwd %%xmm6, %%ymm8",
142 "vpmovzxwd (%%rax), %%ymm8")
143
144 GEN_test_RandM(VPMOVZXBW_256,
145 "vpmovzxbw %%xmm6, %%ymm8",
146 "vpmovzxbw (%%rax), %%ymm8")
147
148 GEN_test_RandM(VPBLENDVB_256,
149 "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7",
150 "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7")
151
152 GEN_test_RandM(VPMINSD_256,
153 "vpminsd %%ymm6, %%ymm8, %%ymm7",
154 "vpminsd (%%rax), %%ymm8, %%ymm7")
155
156 GEN_test_RandM(VPMAXSD_256,
157 "vpmaxsd %%ymm6, %%ymm8, %%ymm7",
158 "vpmaxsd (%%rax), %%ymm8, %%ymm7")
159
160 GEN_test_RandM(VPSHUFB_256,
161 "vpshufb %%ymm6, %%ymm8, %%ymm7",
162 "vpshufb (%%rax), %%ymm8, %%ymm7")
163
164 GEN_test_RandM(VPUNPCKLBW_256,
165 "vpunpcklbw %%ymm6, %%ymm8, %%ymm7",
166 "vpunpcklbw (%%rax), %%ymm8, %%ymm7")
167
168 GEN_test_RandM(VPUNPCKHBW_256,
169 "vpunpckhbw %%ymm6, %%ymm8, %%ymm7",
170 "vpunpckhbw (%%rax), %%ymm8, %%ymm7")
171
172 GEN_test_RandM(VPABSD_256,
173 "vpabsd %%ymm6, %%ymm8",
174 "vpabsd (%%rax), %%ymm8")
175
176 GEN_test_RandM(VPACKUSWB_256,
177 "vpackuswb %%ymm9, %%ymm8, %%ymm7",
178 "vpackuswb (%%rax), %%ymm8, %%ymm7")
179
180 GEN_test_Ronly(VPMOVMSKB_256,
181 "vpmovmskb %%ymm8, %%r14")
182
183 GEN_test_RandM(VPAND_256,
184 "vpand %%ymm9, %%ymm8, %%ymm7",
185 "vpand (%%rax), %%ymm8, %%ymm7")
186
187 GEN_test_RandM(VPCMPEQB_256,
188 "vpcmpeqb %%ymm9, %%ymm8, %%ymm7",
189 "vpcmpeqb (%%rax), %%ymm8, %%ymm7")
190
191 GEN_test_RandM(VPSHUFLW_0x39_256,
192 "vpshuflw $0x39, %%ymm9, %%ymm7",
193 "vpshuflw $0xC6, (%%rax), %%ymm8")
194
195 GEN_test_RandM(VPSHUFHW_0x39_256,
196 "vpshufhw $0x39, %%ymm9, %%ymm7",
197 "vpshufhw $0xC6, (%%rax), %%ymm8")
198
199 GEN_test_RandM(VPMULLW_256,
200 "vpmullw %%ymm9, %%ymm8, %%ymm7",
201 "vpmullw (%%rax), %%ymm8, %%ymm7")
202
203 GEN_test_RandM(VPADDUSW_256,
204 "vpaddusw %%ymm9, %%ymm8, %%ymm7",
205 "vpaddusw (%%rax), %%ymm8, %%ymm7")
206
207 GEN_test_RandM(VPMULHUW_256,
208 "vpmulhuw %%ymm9, %%ymm8, %%ymm7",
209 "vpmulhuw (%%rax), %%ymm8, %%ymm7")
210
211 GEN_test_RandM(VPADDUSB_256,
212 "vpaddusb %%ymm9, %%ymm8, %%ymm7",
213 "vpaddusb (%%rax), %%ymm8, %%ymm7")
214
215 GEN_test_RandM(VPUNPCKLWD_256,
216 "vpunpcklwd %%ymm6, %%ymm8, %%ymm7",
217 "vpunpcklwd (%%rax), %%ymm8, %%ymm7")
218
219 GEN_test_RandM(VPUNPCKHWD_256,
220 "vpunpckhwd %%ymm6, %%ymm8, %%ymm7",
221 "vpunpckhwd (%%rax), %%ymm8, %%ymm7")
222
223 GEN_test_Ronly(VPSLLD_0x05_256,
224 "vpslld $0x5, %%ymm9, %%ymm7")
225
226 GEN_test_Ronly(VPSRLD_0x05_256,
227 "vpsrld $0x5, %%ymm9, %%ymm7")
228
229 GEN_test_Ronly(VPSRAD_0x05_256,
230 "vpsrad $0x5, %%ymm9, %%ymm7")
231
232 GEN_test_RandM(VPSUBUSB_256,
233 "vpsubusb %%ymm9, %%ymm8, %%ymm7",
234 "vpsubusb (%%rax), %%ymm8, %%ymm7")
235
236 GEN_test_RandM(VPSUBSB_256,
237 "vpsubsb %%ymm9, %%ymm8, %%ymm7",
238 "vpsubsb (%%rax), %%ymm8, %%ymm7")
239
240 GEN_test_Ronly(VPSRLDQ_0x05_256,
241 "vpsrldq $0x5, %%ymm9, %%ymm7")
242
243 GEN_test_Ronly(VPSLLDQ_0x05_256,
244 "vpslldq $0x5, %%ymm9, %%ymm7")
245
246 GEN_test_RandM(VPANDN_256,
247 "vpandn %%ymm9, %%ymm8, %%ymm7",
248 "vpandn (%%rax), %%ymm8, %%ymm7")
249
250 GEN_test_RandM(VPUNPCKLQDQ_256,
251 "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7",
252 "vpunpcklqdq (%%rax), %%ymm8, %%ymm7")
253
254 GEN_test_Ronly(VPSRLW_0x05_256,
255 "vpsrlw $0x5, %%ymm9, %%ymm7")
256
257 GEN_test_Ronly(VPSLLW_0x05_256,
258 "vpsllw $0x5, %%ymm9, %%ymm7")
259
260 GEN_test_RandM(VPADDW_256,
261 "vpaddw %%ymm6, %%ymm8, %%ymm7",
262 "vpaddw (%%rax), %%ymm8, %%ymm7")
263
264 GEN_test_RandM(VPACKSSDW_256,
265 "vpackssdw %%ymm9, %%ymm8, %%ymm7",
266 "vpackssdw (%%rax), %%ymm8, %%ymm7")
267
268 GEN_test_RandM(VPUNPCKLDQ_256,
269 "vpunpckldq %%ymm6, %%ymm8, %%ymm7",
270 "vpunpckldq (%%rax), %%ymm8, %%ymm7")
271
272 GEN_test_RandM(VPCMPEQD_256,
273 "vpcmpeqd %%ymm6, %%ymm8, %%ymm7",
274 "vpcmpeqd (%%rax), %%ymm8, %%ymm7")
275
276 GEN_test_RandM(VPSHUFD_0x39_256,
277 "vpshufd $0x39, %%ymm9, %%ymm8",
278 "vpshufd $0xC6, (%%rax), %%ymm7")
279
280 GEN_test_RandM(VPADDQ_256,
281 "vpaddq %%ymm6, %%ymm8, %%ymm7",
282 "vpaddq (%%rax), %%ymm8, %%ymm7")
283
284 GEN_test_RandM(VPSUBQ_256,
285 "vpsubq %%ymm6, %%ymm8, %%ymm7",
286 "vpsubq (%%rax), %%ymm8, %%ymm7")
287
288 GEN_test_RandM(VPSUBW_256,
289 "vpsubw %%ymm6, %%ymm8, %%ymm7",
290 "vpsubw (%%rax), %%ymm8, %%ymm7")
291
292 GEN_test_RandM(VPCMPEQQ_256,
293 "vpcmpeqq %%ymm6, %%ymm8, %%ymm7",
294 "vpcmpeqq (%%rax), %%ymm8, %%ymm7")
295
296 GEN_test_RandM(VPCMPGTQ_256,
297 "vpcmpgtq %%ymm6, %%ymm8, %%ymm7",
298 "vpcmpgtq (%%rax), %%ymm8, %%ymm7")
299
300 GEN_test_Ronly(VPSRLQ_0x05_256,
301 "vpsrlq $0x5, %%ymm9, %%ymm7")
302
303 GEN_test_RandM(VPMULUDQ_256,
304 "vpmuludq %%ymm6, %%ymm8, %%ymm7",
305 "vpmuludq (%%rax), %%ymm8, %%ymm7")
306
307 GEN_test_RandM(VPMULDQ_256,
308 "vpmuldq %%ymm6, %%ymm8, %%ymm7",
309 "vpmuldq (%%rax), %%ymm8, %%ymm7")
310
311 GEN_test_Ronly(VPSLLQ_0x05_256,
312 "vpsllq $0x5, %%ymm9, %%ymm7")
313
314 GEN_test_RandM(VPMAXUD_256,
315 "vpmaxud %%ymm6, %%ymm8, %%ymm7",
316 "vpmaxud (%%rax), %%ymm8, %%ymm7")
317
318 GEN_test_RandM(VPMINUD_256,
319 "vpminud %%ymm6, %%ymm8, %%ymm7",
320 "vpminud (%%rax), %%ymm8, %%ymm7")
321
322 GEN_test_RandM(VPMULLD_256,
323 "vpmulld %%ymm6, %%ymm8, %%ymm7",
324 "vpmulld (%%rax), %%ymm8, %%ymm7")
325
326 GEN_test_RandM(VPMAXUW_256,
327 "vpmaxuw %%ymm6, %%ymm8, %%ymm7",
328 "vpmaxuw (%%rax), %%ymm8, %%ymm7")
329
330 GEN_test_RandM(VPMINUW_256,
331 "vpminuw %%ymm6, %%ymm8, %%ymm7",
332 "vpminuw (%%rax), %%ymm8, %%ymm7")
333
334 GEN_test_RandM(VPMAXSW_256,
335 "vpmaxsw %%ymm6, %%ymm8, %%ymm7",
336 "vpmaxsw (%%rax), %%ymm8, %%ymm7")
337
338 GEN_test_RandM(VPMINSW_256,
339 "vpminsw %%ymm6, %%ymm8, %%ymm7",
340 "vpminsw (%%rax), %%ymm8, %%ymm7")
341
342 GEN_test_RandM(VPMAXUB_256,
343 "vpmaxub %%ymm6, %%ymm8, %%ymm7",
344 "vpmaxub (%%rax), %%ymm8, %%ymm7")
345
346 GEN_test_RandM(VPMINUB_256,
347 "vpminub %%ymm6, %%ymm8, %%ymm7",
348 "vpminub (%%rax), %%ymm8, %%ymm7")
349
350 GEN_test_RandM(VPMAXSB_256,
351 "vpmaxsb %%ymm6, %%ymm8, %%ymm7",
352 "vpmaxsb (%%rax), %%ymm8, %%ymm7")
353
354 GEN_test_RandM(VPMINSB_256,
355 "vpminsb %%ymm6, %%ymm8, %%ymm7",
356 "vpminsb (%%rax), %%ymm8, %%ymm7")
357
358 GEN_test_RandM(VPMOVSXBW_256,
359 "vpmovsxbw %%xmm6, %%ymm8",
360 "vpmovsxbw (%%rax), %%ymm8")
361
362 GEN_test_RandM(VPSUBUSW_256,
363 "vpsubusw %%ymm9, %%ymm8, %%ymm7",
364 "vpsubusw (%%rax), %%ymm8, %%ymm7")
365
366 GEN_test_RandM(VPSUBSW_256,
367 "vpsubsw %%ymm9, %%ymm8, %%ymm7",
368 "vpsubsw (%%rax), %%ymm8, %%ymm7")
369
370 GEN_test_RandM(VPCMPEQW_256,
371 "vpcmpeqw %%ymm6, %%ymm8, %%ymm7",
372 "vpcmpeqw (%%rax), %%ymm8, %%ymm7")
373
374 GEN_test_RandM(VPADDB_256,
375 "vpaddb %%ymm6, %%ymm8, %%ymm7",
376 "vpaddb (%%rax), %%ymm8, %%ymm7")
377
378 GEN_test_RandM(VPUNPCKHDQ_256,
379 "vpunpckhdq %%ymm6, %%ymm8, %%ymm7",
380 "vpunpckhdq (%%rax), %%ymm8, %%ymm7")
381
382 GEN_test_RandM(VPMOVSXDQ_256,
383 "vpmovsxdq %%xmm6, %%ymm8",
384 "vpmovsxdq (%%rax), %%ymm8")
385
386 GEN_test_RandM(VPMOVSXWD_256,
387 "vpmovsxwd %%xmm6, %%ymm8",
388 "vpmovsxwd (%%rax), %%ymm8")
389
390 GEN_test_RandM(VPMULHW_256,
391 "vpmulhw %%ymm9, %%ymm8, %%ymm7",
392 "vpmulhw (%%rax), %%ymm8, %%ymm7")
393
394 GEN_test_RandM(VPUNPCKHQDQ_256,
395 "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7",
396 "vpunpckhqdq (%%rax), %%ymm8, %%ymm7")
397
398 GEN_test_Ronly(VPSRAW_0x05_256,
399 "vpsraw $0x5, %%ymm9, %%ymm7")
400
401 GEN_test_RandM(VPCMPGTB_256,
402 "vpcmpgtb %%ymm6, %%ymm8, %%ymm7",
403 "vpcmpgtb (%%rax), %%ymm8, %%ymm7")
404
405 GEN_test_RandM(VPCMPGTW_256,
406 "vpcmpgtw %%ymm6, %%ymm8, %%ymm7",
407 "vpcmpgtw (%%rax), %%ymm8, %%ymm7")
408
409 GEN_test_RandM(VPCMPGTD_256,
410 "vpcmpgtd %%ymm6, %%ymm8, %%ymm7",
411 "vpcmpgtd (%%rax), %%ymm8, %%ymm7")
412
413 GEN_test_RandM(VPMOVZXBD_256,
414 "vpmovzxbd %%xmm6, %%ymm8",
415 "vpmovzxbd (%%rax), %%ymm8")
416
417 GEN_test_RandM(VPMOVSXBD_256,
418 "vpmovsxbd %%xmm6, %%ymm8",
419 "vpmovsxbd (%%rax), %%ymm8")
420
421 GEN_test_RandM(VPALIGNR_256_1of3,
422 "vpalignr $0, %%ymm6, %%ymm8, %%ymm7",
423 "vpalignr $3, (%%rax), %%ymm8, %%ymm7")
424 GEN_test_RandM(VPALIGNR_256_2of3,
425 "vpalignr $6, %%ymm6, %%ymm8, %%ymm7",
426 "vpalignr $9, (%%rax), %%ymm8, %%ymm7")
427 GEN_test_RandM(VPALIGNR_256_3of3,
428 "vpalignr $12, %%ymm6, %%ymm8, %%ymm7",
429 "vpalignr $15, (%%rax), %%ymm8, %%ymm7")
430
431 GEN_test_RandM(VPBLENDW_256_0x00,
432 "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7",
433 "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7")
434 GEN_test_RandM(VPBLENDW_256_0xFE,
435 "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7",
436 "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7")
437 GEN_test_RandM(VPBLENDW_256_0x30,
438 "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7",
439 "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7")
440 GEN_test_RandM(VPBLENDW_256_0x21,
441 "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7",
442 "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7")
443 GEN_test_RandM(VPBLENDW_256_0xD7,
444 "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7",
445 "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7")
446 GEN_test_RandM(VPBLENDW_256_0xB5,
447 "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7",
448 "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7")
449 GEN_test_RandM(VPBLENDW_256_0x85,
450 "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7",
451 "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7")
452 GEN_test_RandM(VPBLENDW_256_0x29,
453 "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7",
454 "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7")
455
456 GEN_test_RandM(VPSLLW_256,
457 "andl $15, %%r14d;"
458 "vmovd %%r14d, %%xmm6;"
459 "vpsllw %%xmm6, %%ymm8, %%ymm9",
460 "andq $15, 128(%%rax);"
461 "vpsllw 128(%%rax), %%ymm8, %%ymm9")
462
463 GEN_test_RandM(VPSRLW_256,
464 "andl $15, %%r14d;"
465 "vmovd %%r14d, %%xmm6;"
466 "vpsrlw %%xmm6, %%ymm8, %%ymm9",
467 "andq $15, 128(%%rax);"
468 "vpsrlw 128(%%rax), %%ymm8, %%ymm9")
469
470 GEN_test_RandM(VPSRAW_256,
471 "andl $31, %%r14d;"
472 "vmovd %%r14d, %%xmm6;"
473 "vpsraw %%xmm6, %%ymm8, %%ymm9",
474 "andq $15, 128(%%rax);"
475 "vpsraw 128(%%rax), %%ymm8, %%ymm9")
476
477 GEN_test_RandM(VPSLLD_256,
478 "andl $31, %%r14d;"
479 "vmovd %%r14d, %%xmm6;"
480 "vpslld %%xmm6, %%ymm8, %%ymm9",
481 "andq $31, 128(%%rax);"
482 "vpslld 128(%%rax), %%ymm8, %%ymm9")
483
484 GEN_test_RandM(VPSRLD_256,
485 "andl $31, %%r14d;"
486 "vmovd %%r14d, %%xmm6;"
487 "vpsrld %%xmm6, %%ymm8, %%ymm9",
488 "andq $31, 128(%%rax);"
489 "vpsrld 128(%%rax), %%ymm8, %%ymm9")
490
491 GEN_test_RandM(VPSRAD_256,
492 "andl $31, %%r14d;"
493 "vmovd %%r14d, %%xmm6;"
494 "vpsrad %%xmm6, %%ymm8, %%ymm9",
495 "andq $31, 128(%%rax);"
496 "vpsrad 128(%%rax), %%ymm8, %%ymm9")
497
498 GEN_test_RandM(VPSLLQ_256,
499 "andl $63, %%r14d;"
500 "vmovd %%r14d, %%xmm6;"
501 "vpsllq %%xmm6, %%ymm8, %%ymm9",
502 "andq $63, 128(%%rax);"
503 "vpsllq 128(%%rax), %%ymm8, %%ymm9")
504
505 GEN_test_RandM(VPSRLQ_256,
506 "andl $63, %%r14d;"
507 "vmovd %%r14d, %%xmm6;"
508 "vpsrlq %%xmm6, %%ymm8, %%ymm9",
509 "andq $63, 128(%%rax);"
510 "vpsrlq 128(%%rax), %%ymm8, %%ymm9")
511
512 GEN_test_RandM(VPMADDWD_256,
513 "vpmaddwd %%ymm6, %%ymm8, %%ymm7",
514 "vpmaddwd (%%rax), %%ymm8, %%ymm7")
515
516 GEN_test_Monly(VMOVNTDQA_256,
517 "vmovntdqa (%%rax), %%ymm9")
518
519 GEN_test_RandM(VPACKSSWB_256,
520 "vpacksswb %%ymm6, %%ymm8, %%ymm7",
521 "vpacksswb (%%rax), %%ymm8, %%ymm7")
522
523 GEN_test_RandM(VPAVGB_256,
524 "vpavgb %%ymm6, %%ymm8, %%ymm7",
525 "vpavgb (%%rax), %%ymm8, %%ymm7")
526
527 GEN_test_RandM(VPAVGW_256,
528 "vpavgw %%ymm6, %%ymm8, %%ymm7",
529 "vpavgw (%%rax), %%ymm8, %%ymm7")
530
531 GEN_test_RandM(VPADDSB_256,
532 "vpaddsb %%ymm6, %%ymm8, %%ymm7",
533 "vpaddsb (%%rax), %%ymm8, %%ymm7")
534
535 GEN_test_RandM(VPADDSW_256,
536 "vpaddsw %%ymm6, %%ymm8, %%ymm7",
537 "vpaddsw (%%rax), %%ymm8, %%ymm7")
538
539 GEN_test_RandM(VPHADDW_256,
540 "vphaddw %%ymm6, %%ymm8, %%ymm7",
541 "vphaddw (%%rax), %%ymm8, %%ymm7")
542
543 GEN_test_RandM(VPHADDD_256,
544 "vphaddd %%ymm6, %%ymm8, %%ymm7",
545 "vphaddd (%%rax), %%ymm8, %%ymm7")
546
547 GEN_test_RandM(VPHADDSW_256,
548 "vphaddsw %%ymm6, %%ymm8, %%ymm7",
549 "vphaddsw (%%rax), %%ymm8, %%ymm7")
550
551 GEN_test_RandM(VPMADDUBSW_256,
552 "vpmaddubsw %%ymm6, %%ymm8, %%ymm7",
553 "vpmaddubsw (%%rax), %%ymm8, %%ymm7")
554
555 GEN_test_RandM(VPHSUBW_256,
556 "vphsubw %%ymm6, %%ymm8, %%ymm7",
557 "vphsubw (%%rax), %%ymm8, %%ymm7")
558
559 GEN_test_RandM(VPHSUBD_256,
560 "vphsubd %%ymm6, %%ymm8, %%ymm7",
561 "vphsubd (%%rax), %%ymm8, %%ymm7")
562
563 GEN_test_RandM(VPHSUBSW_256,
564 "vphsubsw %%ymm6, %%ymm8, %%ymm7",
565 "vphsubsw (%%rax), %%ymm8, %%ymm7")
566
567 GEN_test_RandM(VPABSB_256,
568 "vpabsb %%ymm6, %%ymm7",
569 "vpabsb (%%rax), %%ymm7")
570
571 GEN_test_RandM(VPABSW_256,
572 "vpabsw %%ymm6, %%ymm7",
573 "vpabsw (%%rax), %%ymm7")
574
575 GEN_test_RandM(VPMOVSXBQ_256,
576 "vpmovsxbq %%xmm6, %%ymm8",
577 "vpmovsxbq (%%rax), %%ymm8")
578
579 GEN_test_RandM(VPMOVSXWQ_256,
580 "vpmovsxwq %%xmm6, %%ymm8",
581 "vpmovsxwq (%%rax), %%ymm8")
582
583 GEN_test_RandM(VPACKUSDW_256,
584 "vpackusdw %%ymm6, %%ymm8, %%ymm7",
585 "vpackusdw (%%rax), %%ymm8, %%ymm7")
586
587 GEN_test_RandM(VPMOVZXBQ_256,
588 "vpmovzxbq %%xmm6, %%ymm8",
589 "vpmovzxbq (%%rax), %%ymm8")
590
591 GEN_test_RandM(VPMOVZXWQ_256,
592 "vpmovzxwq %%xmm6, %%ymm8",
593 "vpmovzxwq (%%rax), %%ymm8")
594
595 GEN_test_RandM(VPMOVZXDQ_256,
596 "vpmovzxdq %%xmm6, %%ymm8",
597 "vpmovzxdq (%%rax), %%ymm8")
598
599 GEN_test_RandM(VMPSADBW_256_0x0,
600 "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7",
601 "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7")
602 GEN_test_RandM(VMPSADBW_256_0x39,
603 "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7",
604 "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7")
605 GEN_test_RandM(VMPSADBW_256_0x32,
606 "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7",
607 "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7")
608 GEN_test_RandM(VMPSADBW_256_0x2b,
609 "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7",
610 "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7")
611 GEN_test_RandM(VMPSADBW_256_0x24,
612 "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7",
613 "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7")
614 GEN_test_RandM(VMPSADBW_256_0x1d,
615 "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7",
616 "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7")
617 GEN_test_RandM(VMPSADBW_256_0x16,
618 "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7",
619 "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7")
620 GEN_test_RandM(VMPSADBW_256_0x0f,
621 "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7",
622 "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7")
623
624 GEN_test_RandM(VPSADBW_256,
625 "vpsadbw %%ymm6, %%ymm8, %%ymm7",
626 "vpsadbw (%%rax), %%ymm8, %%ymm7")
627
628 GEN_test_RandM(VPSIGNB_256,
629 "vpsignb %%ymm6, %%ymm8, %%ymm7",
630 "vpsignb (%%rax), %%ymm8, %%ymm7")
631
632 GEN_test_RandM(VPSIGNW_256,
633 "vpsignw %%ymm6, %%ymm8, %%ymm7",
634 "vpsignw (%%rax), %%ymm8, %%ymm7")
635
636 GEN_test_RandM(VPSIGND_256,
637 "vpsignd %%ymm6, %%ymm8, %%ymm7",
638 "vpsignd (%%rax), %%ymm8, %%ymm7")
639
640 GEN_test_RandM(VPMULHRSW_256,
641 "vpmulhrsw %%ymm6, %%ymm8, %%ymm7",
642 "vpmulhrsw (%%rax), %%ymm8, %%ymm7")
643
644 /* Instructions new in AVX2. */
645
646 GEN_test_Monly(VBROADCASTI128,
647 "vbroadcasti128 (%%rax), %%ymm9")
648
649 GEN_test_RandM(VEXTRACTI128_0x0,
650 "vextracti128 $0x0, %%ymm7, %%xmm9",
651 "vextracti128 $0x0, %%ymm7, (%%rax)")
652
653 GEN_test_RandM(VEXTRACTI128_0x1,
654 "vextracti128 $0x1, %%ymm7, %%xmm9",
655 "vextracti128 $0x1, %%ymm7, (%%rax)")
656
657 GEN_test_RandM(VINSERTI128_0x0,
658 "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8",
659 "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8")
660
661 GEN_test_RandM(VINSERTI128_0x1,
662 "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8",
663 "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8")
664
665 GEN_test_RandM(VPERM2I128_0x00,
666 "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7",
667 "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7")
668 GEN_test_RandM(VPERM2I128_0xFF,
669 "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7",
670 "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7")
671 GEN_test_RandM(VPERM2I128_0x30,
672 "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7",
673 "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7")
674 GEN_test_RandM(VPERM2I128_0x21,
675 "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7",
676 "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7")
677 GEN_test_RandM(VPERM2I128_0x12,
678 "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7",
679 "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7")
680 GEN_test_RandM(VPERM2I128_0x03,
681 "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7",
682 "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7")
683 GEN_test_RandM(VPERM2I128_0x85,
684 "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7",
685 "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7")
686 GEN_test_RandM(VPERM2I128_0x5A,
687 "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7",
688 "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7")
689
690 GEN_test_Ronly(VBROADCASTSS_128,
691 "vbroadcastss %%xmm9, %%xmm7")
692
693 GEN_test_Ronly(VBROADCASTSS_256,
694 "vbroadcastss %%xmm9, %%ymm7")
695
696 GEN_test_Ronly(VBROADCASTSD_256,
697 "vbroadcastsd %%xmm9, %%ymm7")
698
699 GEN_test_RandM(VPERMD,
700 "vpermd %%ymm6, %%ymm7, %%ymm9",
701 "vpermd (%%rax), %%ymm7, %%ymm9")
702
703 GEN_test_RandM(VPERMQ_0x00,
704 "vpermq $0x00, %%ymm6, %%ymm7",
705 "vpermq $0x01, (%%rax), %%ymm7")
706 GEN_test_RandM(VPERMQ_0xFE,
707 "vpermq $0xFE, %%ymm6, %%ymm7",
708 "vpermq $0xFF, (%%rax), %%ymm7")
709 GEN_test_RandM(VPERMQ_0x30,
710 "vpermq $0x30, %%ymm6, %%ymm7",
711 "vpermq $0x03, (%%rax), %%ymm7")
712 GEN_test_RandM(VPERMQ_0x21,
713 "vpermq $0x21, %%ymm6, %%ymm7",
714 "vpermq $0x12, (%%rax), %%ymm7")
715 GEN_test_RandM(VPERMQ_0xD7,
716 "vpermq $0xD7, %%ymm6, %%ymm7",
717 "vpermq $0x6C, (%%rax), %%ymm7")
718 GEN_test_RandM(VPERMQ_0xB5,
719 "vpermq $0xB5, %%ymm6, %%ymm7",
720 "vpermq $0x4A, (%%rax), %%ymm7")
721 GEN_test_RandM(VPERMQ_0x85,
722 "vpermq $0x85, %%ymm6, %%ymm7",
723 "vpermq $0xDC, (%%rax), %%ymm7")
724 GEN_test_RandM(VPERMQ_0x29,
725 "vpermq $0x29, %%ymm6, %%ymm7",
726 "vpermq $0x92, (%%rax), %%ymm7")
727
728 GEN_test_RandM(VPERMPS,
729 "vpermps %%ymm6, %%ymm7, %%ymm9",
730 "vpermps (%%rax), %%ymm7, %%ymm9")
731
732 GEN_test_RandM(VPERMPD_0x00,
733 "vpermpd $0x00, %%ymm6, %%ymm7",
734 "vpermpd $0x01, (%%rax), %%ymm7")
735 GEN_test_RandM(VPERMPD_0xFE,
736 "vpermpd $0xFE, %%ymm6, %%ymm7",
737 "vpermpd $0xFF, (%%rax), %%ymm7")
738 GEN_test_RandM(VPERMPD_0x30,
739 "vpermpd $0x30, %%ymm6, %%ymm7",
740 "vpermpd $0x03, (%%rax), %%ymm7")
741 GEN_test_RandM(VPERMPD_0x21,
742 "vpermpd $0x21, %%ymm6, %%ymm7",
743 "vpermpd $0x12, (%%rax), %%ymm7")
744 GEN_test_RandM(VPERMPD_0xD7,
745 "vpermpd $0xD7, %%ymm6, %%ymm7",
746 "vpermpd $0x6C, (%%rax), %%ymm7")
747 GEN_test_RandM(VPERMPD_0xB5,
748 "vpermpd $0xB5, %%ymm6, %%ymm7",
749 "vpermpd $0x4A, (%%rax), %%ymm7")
750 GEN_test_RandM(VPERMPD_0x85,
751 "vpermpd $0x85, %%ymm6, %%ymm7",
752 "vpermpd $0xDC, (%%rax), %%ymm7")
753 GEN_test_RandM(VPERMPD_0x29,
754 "vpermpd $0x29, %%ymm6, %%ymm7",
755 "vpermpd $0x92, (%%rax), %%ymm7")
756
757 GEN_test_RandM(VPBLENDD_128_0x00,
758 "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7",
759 "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7")
760 GEN_test_RandM(VPBLENDD_128_0x02,
761 "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7",
762 "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7")
763 GEN_test_RandM(VPBLENDD_128_0x04,
764 "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7",
765 "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7")
766 GEN_test_RandM(VPBLENDD_128_0x06,
767 "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7",
768 "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7")
769 GEN_test_RandM(VPBLENDD_128_0x08,
770 "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7",
771 "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7")
772 GEN_test_RandM(VPBLENDD_128_0x0A,
773 "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7",
774 "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7")
775 GEN_test_RandM(VPBLENDD_128_0x0C,
776 "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7",
777 "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7")
778 GEN_test_RandM(VPBLENDD_128_0x0E,
779 "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7",
780 "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7")
781
782 GEN_test_RandM(VPBLENDD_256_0x00,
783 "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7",
784 "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7")
785 GEN_test_RandM(VPBLENDD_256_0xFE,
786 "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7",
787 "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7")
788 GEN_test_RandM(VPBLENDD_256_0x30,
789 "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7",
790 "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7")
791 GEN_test_RandM(VPBLENDD_256_0x21,
792 "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7",
793 "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7")
794 GEN_test_RandM(VPBLENDD_256_0xD7,
795 "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7",
796 "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7")
797 GEN_test_RandM(VPBLENDD_256_0xB5,
798 "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7",
799 "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7")
800 GEN_test_RandM(VPBLENDD_256_0x85,
801 "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7",
802 "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7")
803 GEN_test_RandM(VPBLENDD_256_0x29,
804 "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7",
805 "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7")
806
807 GEN_test_RandM(VPSLLVD_128,
808 "vpslld $27, %%xmm6, %%xmm6;"
809 "vpsrld $27, %%xmm6, %%xmm6;"
810 "vpsllvd %%xmm6, %%xmm8, %%xmm7",
811 "andl $31, (%%rax);"
812 "andl $31, 4(%%rax);"
813 "andl $31, 8(%%rax);"
814 "vpsllvd (%%rax), %%xmm8, %%xmm7")
815
816 GEN_test_RandM(VPSLLVD_256,
817 "vpslld $27, %%ymm6, %%ymm6;"
818 "vpsrld $27, %%ymm6, %%ymm6;"
819 "vpsllvd %%ymm6, %%ymm8, %%ymm7",
820 "andl $31, (%%rax);"
821 "andl $31, 4(%%rax);"
822 "andl $31, 8(%%rax);"
823 "andl $31, 16(%%rax);"
824 "andl $31, 20(%%rax);"
825 "andl $31, 24(%%rax);"
826 "vpsllvd (%%rax), %%ymm8, %%ymm7")
827
828 GEN_test_RandM(VPSLLVQ_128,
829 "vpsllq $58, %%xmm6, %%xmm6;"
830 "vpsrlq $58, %%xmm6, %%xmm6;"
831 "vpsllvq %%xmm6, %%xmm8, %%xmm7",
832 "andl $63, (%%rax);"
833 "vpsllvq (%%rax), %%xmm8, %%xmm7")
834
835 GEN_test_RandM(VPSLLVQ_256,
836 "vpsllq $58, %%ymm6, %%ymm6;"
837 "vpsrlq $58, %%ymm6, %%ymm6;"
838 "vpsllvq %%ymm6, %%ymm8, %%ymm7",
839 "andl $63, (%%rax);"
840 "andl $63, 8(%%rax);"
841 "andl $63, 16(%%rax);"
842 "vpsllvq (%%rax), %%ymm8, %%ymm7")
843
844 GEN_test_RandM(VPSRLVD_128,
845 "vpslld $27, %%xmm6, %%xmm6;"
846 "vpsrld $27, %%xmm6, %%xmm6;"
847 "vpsrlvd %%xmm6, %%xmm8, %%xmm7",
848 "andl $31, (%%rax);"
849 "andl $31, 4(%%rax);"
850 "andl $31, 8(%%rax);"
851 "vpsrlvd (%%rax), %%xmm8, %%xmm7")
852
853 GEN_test_RandM(VPSRLVD_256,
854 "vpslld $27, %%ymm6, %%ymm6;"
855 "vpsrld $27, %%ymm6, %%ymm6;"
856 "vpsrlvd %%ymm6, %%ymm8, %%ymm7",
857 "andl $31, (%%rax);"
858 "andl $31, 4(%%rax);"
859 "andl $31, 8(%%rax);"
860 "andl $31, 16(%%rax);"
861 "andl $31, 20(%%rax);"
862 "andl $31, 24(%%rax);"
863 "vpsrlvd (%%rax), %%ymm8, %%ymm7")
864
865 GEN_test_RandM(VPSRLVQ_128,
866 "vpsllq $58, %%xmm6, %%xmm6;"
867 "vpsrlq $58, %%xmm6, %%xmm6;"
868 "vpsrlvq %%xmm6, %%xmm8, %%xmm7",
869 "andl $63, (%%rax);"
870 "vpsrlvq (%%rax), %%xmm8, %%xmm7")
871
872 GEN_test_RandM(VPSRLVQ_256,
873 "vpsllq $58, %%ymm6, %%ymm6;"
874 "vpsrlq $58, %%ymm6, %%ymm6;"
875 "vpsrlvq %%ymm6, %%ymm8, %%ymm7",
876 "andl $63, (%%rax);"
877 "andl $63, 8(%%rax);"
878 "andl $63, 16(%%rax);"
879 "vpsrlvq (%%rax), %%ymm8, %%ymm7")
880
881 GEN_test_RandM(VPSRAVD_128,
882 "vpslld $27, %%xmm6, %%xmm6;"
883 "vpsrld $27, %%xmm6, %%xmm6;"
884 "vpsravd %%xmm6, %%xmm8, %%xmm7",
885 "andl $31, (%%rax);"
886 "andl $31, 4(%%rax);"
887 "andl $31, 8(%%rax);"
888 "vpsravd (%%rax), %%xmm8, %%xmm7")
889
890 GEN_test_RandM(VPSRAVD_256,
891 "vpslld $27, %%ymm6, %%ymm6;"
892 "vpsrld $27, %%ymm6, %%ymm6;"
893 "vpsravd %%ymm6, %%ymm8, %%ymm7",
894 "andl $31, (%%rax);"
895 "andl $31, 4(%%rax);"
896 "andl $31, 8(%%rax);"
897 "andl $31, 16(%%rax);"
898 "andl $31, 20(%%rax);"
899 "andl $31, 24(%%rax);"
900 "vpsravd (%%rax), %%ymm8, %%ymm7")
901
902 GEN_test_RandM(VPBROADCASTB_128,
903 "vpbroadcastb %%xmm9, %%xmm7",
904 "vpbroadcastb (%%rax), %%xmm7")
905
906 GEN_test_RandM(VPBROADCASTB_256,
907 "vpbroadcastb %%xmm9, %%ymm7",
908 "vpbroadcastb (%%rax), %%ymm7")
909
910 GEN_test_RandM(VPBROADCASTW_128,
911 "vpbroadcastw %%xmm9, %%xmm7",
912 "vpbroadcastw (%%rax), %%xmm7")
913
914 GEN_test_RandM(VPBROADCASTW_256,
915 "vpbroadcastw %%xmm9, %%ymm7",
916 "vpbroadcastw (%%rax), %%ymm7")
917
918 GEN_test_RandM(VPBROADCASTD_128,
919 "vpbroadcastd %%xmm9, %%xmm7",
920 "vpbroadcastd (%%rax), %%xmm7")
921
922 GEN_test_RandM(VPBROADCASTD_256,
923 "vpbroadcastd %%xmm9, %%ymm7",
924 "vpbroadcastd (%%rax), %%ymm7")
925
926 GEN_test_RandM(VPBROADCASTQ_128,
927 "vpbroadcastq %%xmm9, %%xmm7",
928 "vpbroadcastq (%%rax), %%xmm7")
929
930 GEN_test_RandM(VPBROADCASTQ_256,
931 "vpbroadcastq %%xmm9, %%ymm7",
932 "vpbroadcastq (%%rax), %%ymm7")
933
934 GEN_test_Monly(VPMASKMOVD_128_LoadForm,
935 "vpmaskmovd (%%rax), %%xmm8, %%xmm7;"
936 "vxorps %%xmm6, %%xmm6, %%xmm6;"
937 "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9")
938
939 GEN_test_Monly(VPMASKMOVD_256_LoadForm,
940 "vpmaskmovd (%%rax), %%ymm8, %%ymm7;"
941 "vxorps %%ymm6, %%ymm6, %%ymm6;"
942 "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9")
943
944 GEN_test_Monly(VPMASKMOVQ_128_LoadForm,
945 "vpmaskmovq (%%rax), %%xmm8, %%xmm7;"
946 "vxorpd %%xmm6, %%xmm6, %%xmm6;"
947 "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9")
948
949 GEN_test_Monly(VPMASKMOVQ_256_LoadForm,
950 "vpmaskmovq (%%rax), %%ymm8, %%ymm7;"
951 "vxorpd %%ymm6, %%ymm6, %%ymm6;"
952 "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9")
953
954 GEN_test_Monly(VPMASKMOVD_128_StoreForm,
955 "vpmaskmovd %%xmm8, %%xmm7, (%%rax);"
956 "vxorps %%xmm6, %%xmm6, %%xmm6;"
957 "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)")
958
959 GEN_test_Monly(VPMASKMOVD_256_StoreForm,
960 "vpmaskmovd %%ymm8, %%ymm7, (%%rax);"
961 "vxorps %%ymm6, %%ymm6, %%ymm6;"
962 "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)")
963
964 GEN_test_Monly(VPMASKMOVQ_128_StoreForm,
965 "vpmaskmovq %%xmm8, %%xmm7, (%%rax);"
966 "vxorpd %%xmm6, %%xmm6, %%xmm6;"
967 "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)")
968
969 GEN_test_Monly(VPMASKMOVQ_256_StoreForm,
970 "vpmaskmovq %%ymm8, %%ymm7, (%%rax);"
971 "vxorpd %%ymm6, %%ymm6, %%ymm6;"
972 "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)")
973
974 GEN_test_Ronly(VGATHERDPS_128,
975 "vpslld $25, %%xmm7, %%xmm8;"
976 "vpsrld $25, %%xmm8, %%xmm8;"
977 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
978 "leaq randArray(%%rip), %%r14;"
979 "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
980 "xorl %%r14d, %%r14d")
981
982 GEN_test_Ronly(VGATHERDPS_256,
983 "vpslld $25, %%ymm7, %%ymm8;"
984 "vpsrld $25, %%ymm8, %%ymm8;"
985 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
986 "leaq randArray(%%rip), %%r14;"
987 "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
988 "xorl %%r14d, %%r14d")
989
990 GEN_test_Ronly(VGATHERQPS_128_1,
991 "vpsllq $57, %%xmm7, %%xmm8;"
992 "vpsrlq $57, %%xmm8, %%xmm8;"
993 "vpmovsxdq %%xmm6, %%xmm9;"
994 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
995 "vmovdqa 96(%0), %%ymm9;"
996 "leaq randArray(%%rip), %%r14;"
997 "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
998 "xorl %%r14d, %%r14d")
999
1000 GEN_test_Ronly(VGATHERQPS_256_1,
1001 "vpsllq $57, %%ymm7, %%ymm8;"
1002 "vpsrlq $57, %%ymm8, %%ymm8;"
1003 "vpmovsxdq %%xmm6, %%ymm9;"
1004 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1005 "vmovdqa 96(%0), %%ymm9;"
1006 "leaq randArray(%%rip), %%r14;"
1007 "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1008 "xorl %%r14d, %%r14d")
1009
1010 GEN_test_Ronly(VGATHERQPS_128_2,
1011 "vpsllq $57, %%xmm7, %%xmm8;"
1012 "vpsrlq $57, %%xmm8, %%xmm8;"
1013 "vpmovsxdq %%xmm6, %%xmm9;"
1014 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1015 "vmovdqa 96(%0), %%ymm9;"
1016 "leaq randArray(%%rip), %%r14;"
1017 "vmovq %%r14, %%xmm7;"
1018 "vpsllq $2, %%xmm8, %%xmm8;"
1019 "vpbroadcastq %%xmm7, %%xmm7;"
1020 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1021 "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1022 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1023 "vmovdqa 0(%0), %%ymm7;"
1024 "xorl %%r14d, %%r14d")
1025
1026 GEN_test_Ronly(VGATHERQPS_256_2,
1027 "vpsllq $57, %%ymm7, %%ymm8;"
1028 "vpsrlq $57, %%ymm8, %%ymm8;"
1029 "vpmovsxdq %%xmm6, %%ymm9;"
1030 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1031 "vmovdqa 96(%0), %%ymm9;"
1032 "leaq randArray(%%rip), %%r14;"
1033 "vmovq %%r14, %%xmm7;"
1034 "vpsllq $2, %%ymm8, %%ymm8;"
1035 "vpbroadcastq %%xmm7, %%ymm7;"
1036 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1037 "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1038 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1039 "vmovdqa 0(%0), %%ymm7;"
1040 "xorl %%r14d, %%r14d")
1041
1042 GEN_test_Ronly(VGATHERDPD_128,
1043 "vpslld $26, %%xmm7, %%xmm8;"
1044 "vpsrld $26, %%xmm8, %%xmm8;"
1045 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1046 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1047 "vmovdqa 96(%0), %%ymm9;"
1048 "leaq randArray(%%rip), %%r14;"
1049 "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1050 "xorl %%r14d, %%r14d")
1051
1052 GEN_test_Ronly(VGATHERDPD_256,
1053 "vpslld $26, %%ymm7, %%ymm8;"
1054 "vpsrld $26, %%ymm8, %%ymm8;"
1055 "vextracti128 $1, %%ymm6, %%xmm9;"
1056 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1057 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1058 "vmovdqa 96(%0), %%ymm9;"
1059 "leaq randArray(%%rip), %%r14;"
1060 "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1061 "xorl %%r14d, %%r14d")
1062
1063 GEN_test_Ronly(VGATHERQPD_128_1,
1064 "vpsllq $58, %%xmm7, %%xmm8;"
1065 "vpsrlq $58, %%xmm8, %%xmm8;"
1066 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1067 "leaq randArray(%%rip), %%r14;"
1068 "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1069 "xorl %%r14d, %%r14d")
1070
1071 GEN_test_Ronly(VGATHERQPD_256_1,
1072 "vpsllq $58, %%ymm7, %%ymm8;"
1073 "vpsrlq $58, %%ymm8, %%ymm8;"
1074 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1075 "leaq randArray(%%rip), %%r14;"
1076 "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1077 "xorl %%r14d, %%r14d")
1078
1079 GEN_test_Ronly(VGATHERQPD_128_2,
1080 "vpsllq $58, %%xmm7, %%xmm8;"
1081 "vpsrlq $58, %%xmm8, %%xmm8;"
1082 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1083 "leaq randArray(%%rip), %%r14;"
1084 "vmovq %%r14, %%xmm7;"
1085 "vpsllq $2, %%xmm8, %%xmm8;"
1086 "vpbroadcastq %%xmm7, %%xmm7;"
1087 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1088 "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1089 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1090 "vmovdqa 0(%0), %%ymm7;"
1091 "xorl %%r14d, %%r14d")
1092
1093 GEN_test_Ronly(VGATHERQPD_256_2,
1094 "vpsllq $58, %%ymm7, %%ymm8;"
1095 "vpsrlq $58, %%ymm8, %%ymm8;"
1096 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1097 "leaq randArray(%%rip), %%r14;"
1098 "vmovq %%r14, %%xmm7;"
1099 "vpsllq $2, %%ymm8, %%ymm8;"
1100 "vpbroadcastq %%xmm7, %%ymm7;"
1101 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1102 "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1103 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1104 "vmovdqa 0(%0), %%ymm7;"
1105 "xorl %%r14d, %%r14d")
1106
1107 GEN_test_Ronly(VPGATHERDD_128,
1108 "vpslld $25, %%xmm7, %%xmm8;"
1109 "vpsrld $25, %%xmm8, %%xmm8;"
1110 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1111 "leaq randArray(%%rip), %%r14;"
1112 "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1113 "xorl %%r14d, %%r14d")
1114
1115 GEN_test_Ronly(VPGATHERDD_256,
1116 "vpslld $25, %%ymm7, %%ymm8;"
1117 "vpsrld $25, %%ymm8, %%ymm8;"
1118 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1119 "leaq randArray(%%rip), %%r14;"
1120 "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
1121 "xorl %%r14d, %%r14d")
1122
1123 GEN_test_Ronly(VPGATHERQD_128_1,
1124 "vpsllq $57, %%xmm7, %%xmm8;"
1125 "vpsrlq $57, %%xmm8, %%xmm8;"
1126 "vpmovsxdq %%xmm6, %%xmm9;"
1127 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1128 "vmovdqa 96(%0), %%ymm9;"
1129 "leaq randArray(%%rip), %%r14;"
1130 "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1131 "xorl %%r14d, %%r14d")
1132
1133 GEN_test_Ronly(VPGATHERQD_256_1,
1134 "vpsllq $57, %%ymm7, %%ymm8;"
1135 "vpsrlq $57, %%ymm8, %%ymm8;"
1136 "vpmovsxdq %%xmm6, %%ymm9;"
1137 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1138 "vmovdqa 96(%0), %%ymm9;"
1139 "leaq randArray(%%rip), %%r14;"
1140 "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1141 "xorl %%r14d, %%r14d")
1142
1143 GEN_test_Ronly(VPGATHERQD_128_2,
1144 "vpsllq $57, %%xmm7, %%xmm8;"
1145 "vpsrlq $57, %%xmm8, %%xmm8;"
1146 "vpmovsxdq %%xmm6, %%xmm9;"
1147 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1148 "vmovdqa 96(%0), %%ymm9;"
1149 "leaq randArray(%%rip), %%r14;"
1150 "vmovq %%r14, %%xmm7;"
1151 "vpsllq $2, %%xmm8, %%xmm8;"
1152 "vpbroadcastq %%xmm7, %%xmm7;"
1153 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1154 "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1155 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1156 "vmovdqa 0(%0), %%ymm7;"
1157 "xorl %%r14d, %%r14d")
1158
1159 GEN_test_Ronly(VPGATHERQD_256_2,
1160 "vpsllq $57, %%ymm7, %%ymm8;"
1161 "vpsrlq $57, %%ymm8, %%ymm8;"
1162 "vpmovsxdq %%xmm6, %%ymm9;"
1163 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1164 "vmovdqa 96(%0), %%ymm9;"
1165 "leaq randArray(%%rip), %%r14;"
1166 "vmovq %%r14, %%xmm7;"
1167 "vpsllq $2, %%ymm8, %%ymm8;"
1168 "vpbroadcastq %%xmm7, %%ymm7;"
1169 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1170 "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1171 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1172 "vmovdqa 0(%0), %%ymm7;"
1173 "xorl %%r14d, %%r14d")
1174
1175 GEN_test_Ronly(VPGATHERDQ_128,
1176 "vpslld $26, %%xmm7, %%xmm8;"
1177 "vpsrld $26, %%xmm8, %%xmm8;"
1178 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1179 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1180 "vmovdqa 96(%0), %%ymm9;"
1181 "leaq randArray(%%rip), %%r14;"
1182 "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1183 "xorl %%r14d, %%r14d")
1184
1185 GEN_test_Ronly(VPGATHERDQ_256,
1186 "vpslld $26, %%ymm7, %%ymm8;"
1187 "vpsrld $26, %%ymm8, %%ymm8;"
1188 "vextracti128 $1, %%ymm6, %%xmm9;"
1189 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1190 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1191 "vmovdqa 96(%0), %%ymm9;"
1192 "leaq randArray(%%rip), %%r14;"
1193 "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1194 "xorl %%r14d, %%r14d")
1195
1196 GEN_test_Ronly(VPGATHERQQ_128_1,
1197 "vpsllq $58, %%xmm7, %%xmm8;"
1198 "vpsrlq $58, %%xmm8, %%xmm8;"
1199 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1200 "leaq randArray(%%rip), %%r14;"
1201 "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1202 "xorl %%r14d, %%r14d")
1203
1204 GEN_test_Ronly(VPGATHERQQ_256_1,
1205 "vpsllq $58, %%ymm7, %%ymm8;"
1206 "vpsrlq $58, %%ymm8, %%ymm8;"
1207 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1208 "leaq randArray(%%rip), %%r14;"
1209 "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1210 "xorl %%r14d, %%r14d")
1211
1212 GEN_test_Ronly(VPGATHERQQ_128_2,
1213 "vpsllq $58, %%xmm7, %%xmm8;"
1214 "vpsrlq $58, %%xmm8, %%xmm8;"
1215 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1216 "leaq randArray(%%rip), %%r14;"
1217 "vmovq %%r14, %%xmm7;"
1218 "vpsllq $2, %%xmm8, %%xmm8;"
1219 "vpbroadcastq %%xmm7, %%xmm7;"
1220 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1221 "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1222 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1223 "vmovdqa 0(%0), %%ymm7;"
1224 "xorl %%r14d, %%r14d")
1225
1226 GEN_test_Ronly(VPGATHERQQ_256_2,
1227 "vpsllq $58, %%ymm7, %%ymm8;"
1228 "vpsrlq $58, %%ymm8, %%ymm8;"
1229 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1230 "leaq randArray(%%rip), %%r14;"
1231 "vmovq %%r14, %%xmm7;"
1232 "vpsllq $2, %%ymm8, %%ymm8;"
1233 "vpbroadcastq %%xmm7, %%ymm7;"
1234 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1235 "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1236 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1237 "vmovdqa 0(%0), %%ymm7;"
1238 "xorl %%r14d, %%r14d")
1239
1240 /* Comment duplicated above, for convenient reference:
1241 Allowed operands in test insns:
1242 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14.
1243 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14.
1244 Imm8 etc fields are also allowed, where they make sense.
1245 Both forms may use ymm0 as scratch. Mem form may also use
1246 ymm6 as scratch.
1247 */
1248
1249 #define N_DEFAULT_ITERS 3
1250
1251 // Do the specified test some number of times
1252 #define DO_N(_iters, _testfn) \
1253 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
1254
1255 // Do the specified test the default number of times
1256 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
1257
1258
main(void)1259 int main ( void )
1260 {
1261 DO_D( VPOR_256 );
1262 DO_D( VPXOR_256 );
1263 DO_D( VPSUBB_256 );
1264 DO_D( VPSUBD_256 );
1265 DO_D( VPADDD_256 );
1266 DO_D( VPMOVZXWD_256 );
1267 DO_D( VPMOVZXBW_256 );
1268 DO_D( VPBLENDVB_256 );
1269 DO_D( VPMINSD_256 );
1270 DO_D( VPMAXSD_256 );
1271 DO_D( VPSHUFB_256 );
1272 DO_D( VPUNPCKLBW_256 );
1273 DO_D( VPUNPCKHBW_256 );
1274 DO_D( VPABSD_256 );
1275 DO_D( VPACKUSWB_256 );
1276 DO_D( VPMOVMSKB_256 );
1277 DO_D( VPAND_256 );
1278 DO_D( VPCMPEQB_256 );
1279 DO_D( VPSHUFLW_0x39_256 );
1280 DO_D( VPSHUFHW_0x39_256 );
1281 DO_D( VPMULLW_256 );
1282 DO_D( VPADDUSW_256 );
1283 DO_D( VPMULHUW_256 );
1284 DO_D( VPADDUSB_256 );
1285 DO_D( VPUNPCKLWD_256 );
1286 DO_D( VPUNPCKHWD_256 );
1287 DO_D( VPSLLD_0x05_256 );
1288 DO_D( VPSRLD_0x05_256 );
1289 DO_D( VPSRAD_0x05_256 );
1290 DO_D( VPSUBUSB_256 );
1291 DO_D( VPSUBSB_256 );
1292 DO_D( VPSRLDQ_0x05_256 );
1293 DO_D( VPSLLDQ_0x05_256 );
1294 DO_D( VPANDN_256 );
1295 DO_D( VPUNPCKLQDQ_256 );
1296 DO_D( VPSRLW_0x05_256 );
1297 DO_D( VPSLLW_0x05_256 );
1298 DO_D( VPADDW_256 );
1299 DO_D( VPACKSSDW_256 );
1300 DO_D( VPUNPCKLDQ_256 );
1301 DO_D( VPCMPEQD_256 );
1302 DO_D( VPSHUFD_0x39_256 );
1303 DO_D( VPADDQ_256 );
1304 DO_D( VPSUBQ_256 );
1305 DO_D( VPSUBW_256 );
1306 DO_D( VPCMPEQQ_256 );
1307 DO_D( VPCMPGTQ_256 );
1308 DO_D( VPSRLQ_0x05_256 );
1309 DO_D( VPMULUDQ_256 );
1310 DO_D( VPMULDQ_256 );
1311 DO_D( VPSLLQ_0x05_256 );
1312 DO_D( VPMAXUD_256 );
1313 DO_D( VPMINUD_256 );
1314 DO_D( VPMULLD_256 );
1315 DO_D( VPMAXUW_256 );
1316 DO_D( VPMINUW_256 );
1317 DO_D( VPMAXSW_256 );
1318 DO_D( VPMINSW_256 );
1319 DO_D( VPMAXUB_256 );
1320 DO_D( VPMINUB_256 );
1321 DO_D( VPMAXSB_256 );
1322 DO_D( VPMINSB_256 );
1323 DO_D( VPMOVSXBW_256 );
1324 DO_D( VPSUBUSW_256 );
1325 DO_D( VPSUBSW_256 );
1326 DO_D( VPCMPEQW_256 );
1327 DO_D( VPADDB_256 );
1328 DO_D( VPUNPCKHDQ_256 );
1329 DO_D( VPMOVSXDQ_256 );
1330 DO_D( VPMOVSXWD_256 );
1331 DO_D( VPMULHW_256 );
1332 DO_D( VPUNPCKHQDQ_256 );
1333 DO_D( VPSRAW_0x05_256 );
1334 DO_D( VPCMPGTB_256 );
1335 DO_D( VPCMPGTW_256 );
1336 DO_D( VPCMPGTD_256 );
1337 DO_D( VPMOVZXBD_256 );
1338 DO_D( VPMOVSXBD_256 );
1339 DO_D( VPALIGNR_256_1of3 );
1340 DO_D( VPALIGNR_256_2of3 );
1341 DO_D( VPALIGNR_256_3of3 );
1342 DO_D( VPBLENDW_256_0x00 );
1343 DO_D( VPBLENDW_256_0xFE );
1344 DO_D( VPBLENDW_256_0x30 );
1345 DO_D( VPBLENDW_256_0x21 );
1346 DO_D( VPBLENDW_256_0xD7 );
1347 DO_D( VPBLENDW_256_0xB5 );
1348 DO_D( VPBLENDW_256_0x85 );
1349 DO_D( VPBLENDW_256_0x29 );
1350 DO_D( VPSLLW_256 );
1351 DO_D( VPSRLW_256 );
1352 DO_D( VPSRAW_256 );
1353 DO_D( VPSLLD_256 );
1354 DO_D( VPSRLD_256 );
1355 DO_D( VPSRAD_256 );
1356 DO_D( VPSLLQ_256 );
1357 DO_D( VPSRLQ_256 );
1358 DO_D( VPMADDWD_256 );
1359 DO_D( VMOVNTDQA_256 );
1360 DO_D( VPACKSSWB_256 );
1361 DO_D( VPAVGB_256 );
1362 DO_D( VPAVGW_256 );
1363 DO_D( VPADDSB_256 );
1364 DO_D( VPADDSW_256 );
1365 DO_D( VPHADDW_256 );
1366 DO_D( VPHADDD_256 );
1367 DO_D( VPHADDSW_256 );
1368 DO_D( VPMADDUBSW_256 );
1369 DO_D( VPHSUBW_256 );
1370 DO_D( VPHSUBD_256 );
1371 DO_D( VPHSUBSW_256 );
1372 DO_D( VPABSB_256 );
1373 DO_D( VPABSW_256 );
1374 DO_D( VPMOVSXBQ_256 );
1375 DO_D( VPMOVSXWQ_256 );
1376 DO_D( VPACKUSDW_256 );
1377 DO_D( VPMOVZXBQ_256 );
1378 DO_D( VPMOVZXWQ_256 );
1379 DO_D( VPMOVZXDQ_256 );
1380 DO_D( VMPSADBW_256_0x0 );
1381 DO_D( VMPSADBW_256_0x39 );
1382 DO_D( VMPSADBW_256_0x32 );
1383 DO_D( VMPSADBW_256_0x2b );
1384 DO_D( VMPSADBW_256_0x24 );
1385 DO_D( VMPSADBW_256_0x1d );
1386 DO_D( VMPSADBW_256_0x16 );
1387 DO_D( VMPSADBW_256_0x0f );
1388 DO_D( VPSADBW_256 );
1389 DO_D( VPSIGNB_256 );
1390 DO_D( VPSIGNW_256 );
1391 DO_D( VPSIGND_256 );
1392 DO_D( VPMULHRSW_256 );
1393 DO_D( VBROADCASTI128 );
1394 DO_D( VEXTRACTI128_0x0 );
1395 DO_D( VEXTRACTI128_0x1 );
1396 DO_D( VINSERTI128_0x0 );
1397 DO_D( VINSERTI128_0x1 );
1398 DO_D( VPERM2I128_0x00 );
1399 DO_D( VPERM2I128_0xFF );
1400 DO_D( VPERM2I128_0x30 );
1401 DO_D( VPERM2I128_0x21 );
1402 DO_D( VPERM2I128_0x12 );
1403 DO_D( VPERM2I128_0x03 );
1404 DO_D( VPERM2I128_0x85 );
1405 DO_D( VPERM2I128_0x5A );
1406 DO_D( VBROADCASTSS_128 );
1407 DO_D( VBROADCASTSS_256 );
1408 DO_D( VBROADCASTSD_256 );
1409 DO_D( VPERMD );
1410 DO_D( VPERMQ_0x00 );
1411 DO_D( VPERMQ_0xFE );
1412 DO_D( VPERMQ_0x30 );
1413 DO_D( VPERMQ_0x21 );
1414 DO_D( VPERMQ_0xD7 );
1415 DO_D( VPERMQ_0xB5 );
1416 DO_D( VPERMQ_0x85 );
1417 DO_D( VPERMQ_0x29 );
1418 DO_D( VPERMPS );
1419 DO_D( VPERMPD_0x00 );
1420 DO_D( VPERMPD_0xFE );
1421 DO_D( VPERMPD_0x30 );
1422 DO_D( VPERMPD_0x21 );
1423 DO_D( VPERMPD_0xD7 );
1424 DO_D( VPERMPD_0xB5 );
1425 DO_D( VPERMPD_0x85 );
1426 DO_D( VPERMPD_0x29 );
1427 DO_D( VPBLENDD_128_0x00 );
1428 DO_D( VPBLENDD_128_0x02 );
1429 DO_D( VPBLENDD_128_0x04 );
1430 DO_D( VPBLENDD_128_0x06 );
1431 DO_D( VPBLENDD_128_0x08 );
1432 DO_D( VPBLENDD_128_0x0A );
1433 DO_D( VPBLENDD_128_0x0C );
1434 DO_D( VPBLENDD_128_0x0E );
1435 DO_D( VPBLENDD_256_0x00 );
1436 DO_D( VPBLENDD_256_0xFE );
1437 DO_D( VPBLENDD_256_0x30 );
1438 DO_D( VPBLENDD_256_0x21 );
1439 DO_D( VPBLENDD_256_0xD7 );
1440 DO_D( VPBLENDD_256_0xB5 );
1441 DO_D( VPBLENDD_256_0x85 );
1442 DO_D( VPBLENDD_256_0x29 );
1443 DO_D( VPSLLVD_128 );
1444 DO_D( VPSLLVD_256 );
1445 DO_D( VPSLLVQ_128 );
1446 DO_D( VPSLLVQ_256 );
1447 DO_D( VPSRLVD_128 );
1448 DO_D( VPSRLVD_256 );
1449 DO_D( VPSRLVQ_128 );
1450 DO_D( VPSRLVQ_256 );
1451 DO_D( VPSRAVD_128 );
1452 DO_D( VPSRAVD_256 );
1453 DO_D( VPBROADCASTB_128 );
1454 DO_D( VPBROADCASTB_256 );
1455 DO_D( VPBROADCASTW_128 );
1456 DO_D( VPBROADCASTW_256 );
1457 DO_D( VPBROADCASTD_128 );
1458 DO_D( VPBROADCASTD_256 );
1459 DO_D( VPBROADCASTQ_128 );
1460 DO_D( VPBROADCASTQ_256 );
1461 DO_D( VPMASKMOVD_128_LoadForm );
1462 DO_D( VPMASKMOVD_256_LoadForm );
1463 DO_D( VPMASKMOVQ_128_LoadForm );
1464 DO_D( VPMASKMOVQ_256_LoadForm );
1465 DO_D( VPMASKMOVD_128_StoreForm );
1466 DO_D( VPMASKMOVD_256_StoreForm );
1467 DO_D( VPMASKMOVQ_128_StoreForm );
1468 DO_D( VPMASKMOVQ_256_StoreForm );
1469 { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); }
1470 DO_D( VGATHERDPS_128 );
1471 DO_D( VGATHERDPS_256 );
1472 DO_D( VGATHERQPS_128_1 );
1473 DO_D( VGATHERQPS_256_1 );
1474 DO_D( VGATHERQPS_128_2 );
1475 DO_D( VGATHERQPS_256_2 );
1476 DO_D( VGATHERDPD_128 );
1477 DO_D( VGATHERDPD_256 );
1478 DO_D( VGATHERQPD_128_1 );
1479 DO_D( VGATHERQPD_256_1 );
1480 DO_D( VGATHERQPD_128_2 );
1481 DO_D( VGATHERQPD_256_2 );
1482 DO_D( VPGATHERDD_128 );
1483 DO_D( VPGATHERDD_256 );
1484 DO_D( VPGATHERQD_128_1 );
1485 DO_D( VPGATHERQD_256_1 );
1486 DO_D( VPGATHERQD_128_2 );
1487 DO_D( VPGATHERQD_256_2 );
1488 DO_D( VPGATHERDQ_128 );
1489 DO_D( VPGATHERDQ_256 );
1490 DO_D( VPGATHERQQ_128_1 );
1491 DO_D( VPGATHERQQ_256_1 );
1492 DO_D( VPGATHERQQ_128_2 );
1493 DO_D( VPGATHERQQ_256_2 );
1494 return 0;
1495 }
1496