1 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <assert.h> 5 #include "tests/malloc.h" 6 7 typedef unsigned char UChar; 8 typedef unsigned int UInt; 9 typedef unsigned long int UWord; 10 typedef unsigned long long int ULong; 11 12 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) 13 14 typedef union { UChar u8[32]; UInt u32[8]; } YMM; 15 16 typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; 17 18 void showYMM ( YMM* vec ) 19 { 20 int i; 21 assert(IS_32_ALIGNED(vec)); 22 for (i = 31; i >= 0; i--) { 23 printf("%02x", (UInt)vec->u8[i]); 24 if (i > 0 && 0 == ((i+0) & 7)) printf("."); 25 } 26 } 27 28 void showBlock ( char* msg, Block* block ) 29 { 30 printf(" %s\n", msg); 31 printf(" "); showYMM(&block->a1); printf("\n"); 32 printf(" "); showYMM(&block->a2); printf("\n"); 33 printf(" "); showYMM(&block->a3); printf("\n"); 34 printf(" "); showYMM(&block->a4); printf("\n"); 35 printf(" %016llx\n", block->u64); 36 } 37 38 UChar randUChar ( void ) 39 { 40 static UInt seed = 80021; 41 seed = 1103515245 * seed + 12345; 42 return (seed >> 17) & 0xFF; 43 } 44 45 void randBlock ( Block* b ) 46 { 47 int i; 48 UChar* p = (UChar*)b; 49 for (i = 0; i < sizeof(Block); i++) 50 p[i] = randUChar(); 51 } 52 53 54 /* Generate a function test_NAME, that tests the given insn, in both 55 its mem and reg forms. The reg form of the insn may mention, as 56 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of 57 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 58 and %r14. It's OK for the insn to clobber ymm0, as this is needed 59 for testing PCMPxSTRx, and ymm6, as this is needed for testing 60 MOVMASK variants. */ 61 62 #define GEN_test_RandM(_name, _reg_form, _mem_form) \ 63 \ 64 __attribute__ ((noinline)) static void test_##_name ( void ) \ 65 { \ 66 Block* b = memalign32(sizeof(Block)); \ 67 randBlock(b); \ 68 printf("%s(reg)\n", #_name); \ 69 showBlock("before", b); \ 70 __asm__ __volatile__( \ 71 "vmovdqa 0(%0),%%ymm7" "\n\t" \ 72 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 73 "vmovdqa 64(%0),%%ymm6" "\n\t" \ 74 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 75 "movq 128(%0),%%r14" "\n\t" \ 76 _reg_form "\n\t" \ 77 "vmovdqa %%ymm7, 0(%0)" "\n\t" \ 78 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 79 "vmovdqa %%ymm6, 64(%0)" "\n\t" \ 80 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 81 "movq %%r14, 128(%0)" "\n\t" \ 82 : /*OUT*/ \ 83 : /*IN*/"r"(b) \ 84 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ 85 ); \ 86 showBlock("after", b); \ 87 randBlock(b); \ 88 printf("%s(mem)\n", #_name); \ 89 showBlock("before", b); \ 90 __asm__ __volatile__( \ 91 "leaq 0(%0),%%rax" "\n\t" \ 92 "vmovdqa 32(%0),%%ymm8" "\n\t" \ 93 "vmovdqa 64(%0),%%ymm7" "\n\t" \ 94 "vmovdqa 96(%0),%%ymm9" "\n\t" \ 95 "movq 128(%0),%%r14" "\n\t" \ 96 _mem_form "\n\t" \ 97 "vmovdqa %%ymm8, 32(%0)" "\n\t" \ 98 "vmovdqa %%ymm7, 64(%0)" "\n\t" \ 99 "vmovdqa %%ymm9, 96(%0)" "\n\t" \ 100 "movq %%r14, 128(%0)" "\n\t" \ 101 : /*OUT*/ \ 102 : /*IN*/"r"(b) \ 103 : /*TRASH*/"xmm6", \ 104 "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ 105 ); \ 106 showBlock("after", b); \ 107 printf("\n"); \ 108 free(b); \ 109 } 110 111 #define GEN_test_Ronly(_name, _reg_form) \ 112 GEN_test_RandM(_name, _reg_form, "") 113 #define GEN_test_Monly(_name, _mem_form) \ 114 GEN_test_RandM(_name, "", _mem_form) 115 116 117 GEN_test_RandM(VPOR_128, 118 "vpor %%xmm6, %%xmm8, %%xmm7", 119 "vpor (%%rax), %%xmm8, %%xmm7") 120 121 GEN_test_RandM(VPXOR_128, 122 "vpxor %%xmm6, %%xmm8, %%xmm7", 123 "vpxor (%%rax), %%xmm8, %%xmm7") 124 125 GEN_test_RandM(VPSUBB_128, 126 "vpsubb %%xmm6, %%xmm8, %%xmm7", 127 "vpsubb (%%rax), %%xmm8, %%xmm7") 128 129 GEN_test_RandM(VPSUBD_128, 130 "vpsubd %%xmm6, %%xmm8, %%xmm7", 131 "vpsubd (%%rax), %%xmm8, %%xmm7") 132 133 GEN_test_RandM(VPADDD_128, 134 "vpaddd %%xmm6, %%xmm8, %%xmm7", 135 "vpaddd (%%rax), %%xmm8, %%xmm7") 136 137 GEN_test_RandM(VPMOVZXWD_128, 138 "vpmovzxwd %%xmm6, %%xmm8", 139 "vpmovzxwd (%%rax), %%xmm8") 140 141 GEN_test_RandM(VPMOVZXBW_128, 142 "vpmovzxbw %%xmm6, %%xmm8", 143 "vpmovzxbw (%%rax), %%xmm8") 144 145 GEN_test_RandM(VPBLENDVB_128, 146 "vpblendvb %%xmm9, %%xmm6, %%xmm8, %%xmm7", 147 "vpblendvb %%xmm9, (%%rax), %%xmm8, %%xmm7") 148 149 GEN_test_RandM(VPMINSD_128, 150 "vpminsd %%xmm6, %%xmm8, %%xmm7", 151 "vpminsd (%%rax), %%xmm8, %%xmm7") 152 153 GEN_test_RandM(VPMAXSD_128, 154 "vpmaxsd %%xmm6, %%xmm8, %%xmm7", 155 "vpmaxsd (%%rax), %%xmm8, %%xmm7") 156 157 GEN_test_RandM(VANDPD_128, 158 "vandpd %%xmm6, %%xmm8, %%xmm7", 159 "vandpd (%%rax), %%xmm8, %%xmm7") 160 161 GEN_test_RandM(VCVTSI2SD_32, 162 "vcvtsi2sdl %%r14d, %%xmm8, %%xmm7", 163 "vcvtsi2sdl (%%rax), %%xmm8, %%xmm7") 164 165 GEN_test_RandM(VCVTSI2SD_64, 166 "vcvtsi2sdq %%r14, %%xmm8, %%xmm7", 167 "vcvtsi2sdq (%%rax), %%xmm8, %%xmm7") 168 169 GEN_test_RandM(VCVTSI2SS_64, 170 "vcvtsi2ssq %%r14, %%xmm8, %%xmm7", 171 "vcvtsi2ssq (%%rax), %%xmm8, %%xmm7") 172 173 GEN_test_RandM(VCVTTSD2SI_32, 174 "vcvttsd2si %%xmm8, %%r14d", 175 "vcvttsd2si (%%rax), %%r14d") 176 177 GEN_test_RandM(VCVTTSD2SI_64, 178 "vcvttsd2si %%xmm8, %%r14", 179 "vcvttsd2si (%%rax), %%r14") 180 181 GEN_test_RandM(VCVTSD2SI_32, 182 "vcvtsd2si %%xmm8, %%r14d", 183 "vcvtsd2si (%%rax), %%r14d") 184 185 GEN_test_RandM(VCVTSD2SI_64, 186 "vcvtsd2si %%xmm8, %%r14", 187 "vcvtsd2si (%%rax), %%r14") 188 189 GEN_test_RandM(VPSHUFB_128, 190 "vpshufb %%xmm6, %%xmm8, %%xmm7", 191 "vpshufb (%%rax), %%xmm8, %%xmm7") 192 193 GEN_test_RandM(VCMPSD_128_0x0, 194 "vcmpsd $0, %%xmm6, %%xmm8, %%xmm7", 195 "vcmpsd $0, (%%rax), %%xmm8, %%xmm7") 196 GEN_test_RandM(VCMPSD_128_0x1, 197 "vcmpsd $1, %%xmm6, %%xmm8, %%xmm7", 198 "vcmpsd $1, (%%rax), %%xmm8, %%xmm7") 199 GEN_test_RandM(VCMPSD_128_0x2, 200 "vcmpsd $2, %%xmm6, %%xmm8, %%xmm7", 201 "vcmpsd $2, (%%rax), %%xmm8, %%xmm7") 202 GEN_test_RandM(VCMPSD_128_0x3, 203 "vcmpsd $3, %%xmm6, %%xmm8, %%xmm7", 204 "vcmpsd $3, (%%rax), %%xmm8, %%xmm7") 205 GEN_test_RandM(VCMPSD_128_0x4, 206 "vcmpsd $4, %%xmm6, %%xmm8, %%xmm7", 207 "vcmpsd $4, (%%rax), %%xmm8, %%xmm7") 208 GEN_test_RandM(VCMPSD_128_0x5, 209 "vcmpsd $5, %%xmm6, %%xmm8, %%xmm7", 210 "vcmpsd $5, (%%rax), %%xmm8, %%xmm7") 211 GEN_test_RandM(VCMPSD_128_0x6, 212 "vcmpsd $6, %%xmm6, %%xmm8, %%xmm7", 213 "vcmpsd $6, (%%rax), %%xmm8, %%xmm7") 214 GEN_test_RandM(VCMPSD_128_0x7, 215 "vcmpsd $7, %%xmm6, %%xmm8, %%xmm7", 216 "vcmpsd $7, (%%rax), %%xmm8, %%xmm7") 217 GEN_test_RandM(VCMPSD_128_0x8, 218 "vcmpsd $8, %%xmm6, %%xmm8, %%xmm7", 219 "vcmpsd $8, (%%rax), %%xmm8, %%xmm7") 220 GEN_test_RandM(VCMPSD_128_0xA, 221 "vcmpsd $0xA, %%xmm6, %%xmm8, %%xmm7", 222 "vcmpsd $0xA, (%%rax), %%xmm8, %%xmm7") 223 GEN_test_RandM(VCMPSD_128_0xC, 224 "vcmpsd $0xC, %%xmm6, %%xmm8, %%xmm7", 225 "vcmpsd $0xC, (%%rax), %%xmm8, %%xmm7") 226 GEN_test_RandM(VCMPSD_128_0xD, 227 "vcmpsd $0xD, %%xmm6, %%xmm8, %%xmm7", 228 "vcmpsd $0xD, (%%rax), %%xmm8, %%xmm7") 229 GEN_test_RandM(VCMPSD_128_0xE, 230 "vcmpsd $0xE, %%xmm6, %%xmm8, %%xmm7", 231 "vcmpsd $0xE, (%%rax), %%xmm8, %%xmm7") 232 GEN_test_RandM(VCMPSD_128_0x10, 233 "vcmpsd $0x10, %%xmm6, %%xmm8, %%xmm7", 234 "vcmpsd $0x10, (%%rax), %%xmm8, %%xmm7") 235 GEN_test_RandM(VCMPSD_128_0x11, 236 "vcmpsd $0x11, %%xmm6, %%xmm8, %%xmm7", 237 "vcmpsd $0x11, (%%rax), %%xmm8, %%xmm7") 238 GEN_test_RandM(VCMPSD_128_0x12, 239 "vcmpsd $0x12, %%xmm6, %%xmm8, %%xmm7", 240 "vcmpsd $0x12, (%%rax), %%xmm8, %%xmm7") 241 GEN_test_RandM(VCMPSD_128_0x13, 242 "vcmpsd $0x13, %%xmm6, %%xmm8, %%xmm7", 243 "vcmpsd $0x13, (%%rax), %%xmm8, %%xmm7") 244 GEN_test_RandM(VCMPSD_128_0x14, 245 "vcmpsd $0x14, %%xmm6, %%xmm8, %%xmm7", 246 "vcmpsd $0x14, (%%rax), %%xmm8, %%xmm7") 247 GEN_test_RandM(VCMPSD_128_0x15, 248 "vcmpsd $0x15, %%xmm6, %%xmm8, %%xmm7", 249 "vcmpsd $0x15, (%%rax), %%xmm8, %%xmm7") 250 GEN_test_RandM(VCMPSD_128_0x16, 251 "vcmpsd $0x16, %%xmm6, %%xmm8, %%xmm7", 252 "vcmpsd $0x16, (%%rax), %%xmm8, %%xmm7") 253 GEN_test_RandM(VCMPSD_128_0x17, 254 "vcmpsd $0x17, %%xmm6, %%xmm8, %%xmm7", 255 "vcmpsd $0x17, (%%rax), %%xmm8, %%xmm7") 256 GEN_test_RandM(VCMPSD_128_0x18, 257 "vcmpsd $0x18, %%xmm6, %%xmm8, %%xmm7", 258 "vcmpsd $0x18, (%%rax), %%xmm8, %%xmm7") 259 GEN_test_RandM(VCMPSD_128_0x19, 260 "vcmpsd $0x19, %%xmm6, %%xmm8, %%xmm7", 261 "vcmpsd $0x19, (%%rax), %%xmm8, %%xmm7") 262 GEN_test_RandM(VCMPSD_128_0x1A, 263 "vcmpsd $0x1A, %%xmm6, %%xmm8, %%xmm7", 264 "vcmpsd $0x1A, (%%rax), %%xmm8, %%xmm7") 265 GEN_test_RandM(VCMPSD_128_0x1C, 266 "vcmpsd $0x1C, %%xmm6, %%xmm8, %%xmm7", 267 "vcmpsd $0x1C, (%%rax), %%xmm8, %%xmm7") 268 GEN_test_RandM(VCMPSD_128_0x1D, 269 "vcmpsd $0x1D, %%xmm6, %%xmm8, %%xmm7", 270 "vcmpsd $0x1D, (%%rax), %%xmm8, %%xmm7") 271 GEN_test_RandM(VCMPSD_128_0x1E, 272 "vcmpsd $0x1E, %%xmm6, %%xmm8, %%xmm7", 273 "vcmpsd $0x1E, (%%rax), %%xmm8, %%xmm7") 274 275 GEN_test_RandM(VSQRTSD_128, 276 "vsqrtsd %%xmm6, %%xmm8, %%xmm7", 277 "vsqrtsd (%%rax), %%xmm8, %%xmm7") 278 279 GEN_test_RandM(VORPS_128, 280 "vorps %%xmm6, %%xmm8, %%xmm7", 281 "vorps (%%rax), %%xmm8, %%xmm7") 282 283 GEN_test_RandM(VANDNPS_128, 284 "vandnps %%xmm6, %%xmm8, %%xmm7", 285 "vandnps (%%rax), %%xmm8, %%xmm7") 286 287 GEN_test_RandM(VMAXSS_128, 288 "vmaxss %%xmm6, %%xmm8, %%xmm7", 289 "vmaxss (%%rax), %%xmm8, %%xmm7") 290 291 GEN_test_RandM(VMINSS_128, 292 "vminss %%xmm6, %%xmm8, %%xmm7", 293 "vminss (%%rax), %%xmm8, %%xmm7") 294 295 GEN_test_RandM(VANDPS_128, 296 "vandps %%xmm6, %%xmm8, %%xmm7", 297 "vandps (%%rax), %%xmm8, %%xmm7") 298 299 GEN_test_RandM(VCVTSI2SS_128, 300 "vcvtsi2ssl %%r14d, %%xmm8, %%xmm7", 301 "vcvtsi2ssl (%%rax), %%xmm8, %%xmm7") 302 303 GEN_test_RandM(VUNPCKLPS_128, 304 "vunpcklps %%xmm6, %%xmm8, %%xmm7", 305 "vunpcklps (%%rax), %%xmm8, %%xmm7") 306 307 GEN_test_RandM(VDIVSS_128, 308 "vdivss %%xmm6, %%xmm8, %%xmm7", 309 "vdivss (%%rax), %%xmm8, %%xmm7") 310 311 GEN_test_RandM(VADDSS_128, 312 "vaddss %%xmm6, %%xmm8, %%xmm7", 313 "vaddss (%%rax), %%xmm8, %%xmm7") 314 315 GEN_test_RandM(VSUBSS_128, 316 "vsubss %%xmm6, %%xmm8, %%xmm7", 317 "vsubss (%%rax), %%xmm8, %%xmm7") 318 319 GEN_test_RandM(VMULSS_128, 320 "vmulss %%xmm6, %%xmm8, %%xmm7", 321 "vmulss (%%rax), %%xmm8, %%xmm7") 322 323 GEN_test_RandM(VPUNPCKLBW_128, 324 "vpunpcklbw %%xmm6, %%xmm8, %%xmm7", 325 "vpunpcklbw (%%rax), %%xmm8, %%xmm7") 326 327 GEN_test_RandM(VPUNPCKHBW_128, 328 "vpunpckhbw %%xmm6, %%xmm8, %%xmm7", 329 "vpunpckhbw (%%rax), %%xmm8, %%xmm7") 330 331 GEN_test_RandM(VCVTTSS2SI_32, 332 "vcvttss2si %%xmm8, %%r14d", 333 "vcvttss2si (%%rax), %%r14d") 334 335 GEN_test_RandM(VCVTSS2SI_32, 336 "vcvtss2si %%xmm8, %%r14d", 337 "vcvtss2si (%%rax), %%r14d") 338 339 GEN_test_RandM(VMOVQ_XMMorMEM64_to_XMM, 340 "vmovq %%xmm7, %%xmm8", 341 "vmovq (%%rax), %%xmm8") 342 343 /* NB tests the reg form only */ 344 GEN_test_Ronly(VMOVQ_XMM_to_IREG64, 345 "vmovq %%xmm7, %%r14") 346 347 /* This insn only exists in the reg-reg-reg form. */ 348 GEN_test_Ronly(VMOVHLPS_128, 349 "vmovhlps %%xmm6, %%xmm8, %%xmm7") 350 351 GEN_test_RandM(VPABSD_128, 352 "vpabsd %%xmm6, %%xmm8", 353 "vpabsd (%%rax), %%xmm8") 354 355 /* This insn only exists in the reg-reg-reg form. */ 356 GEN_test_Ronly(VMOVLHPS_128, 357 "vmovlhps %%xmm6, %%xmm8, %%xmm7") 358 359 GEN_test_Monly(VMOVNTDQ_128, 360 "vmovntdq %%xmm8, (%%rax)") 361 362 GEN_test_Monly(VMOVNTDQ_256, 363 "vmovntdq %%ymm8, (%%rax)") 364 365 GEN_test_RandM(VMOVUPS_XMM_to_XMMorMEM, 366 "vmovups %%xmm8, %%xmm7", 367 "vmovups %%xmm9, (%%rax)") 368 369 GEN_test_RandM(VMOVQ_IREGorMEM64_to_XMM, 370 "vmovq %%r14, %%xmm7", 371 "vmovq (%%rax), %%xmm9") 372 373 GEN_test_RandM(VPCMPESTRM_0x45_128, 374 "vpcmpestrm $0x45, %%xmm7, %%xmm8; movapd %%xmm0, %%xmm9", 375 "vpcmpestrm $0x45, (%%rax), %%xmm8; movapd %%xmm0, %%xmm9") 376 377 /* NB tests the reg form only */ 378 GEN_test_Ronly(VMOVD_XMM_to_IREG32, 379 "vmovd %%xmm7, %%r14d") 380 381 GEN_test_RandM(VCVTSD2SS_128, 382 "vcvtsd2ss %%xmm9, %%xmm8, %%xmm7", 383 "vcvtsd2ss (%%rax), %%xmm8, %%xmm7") 384 385 GEN_test_RandM(VCVTSS2SD_128, 386 "vcvtss2sd %%xmm9, %%xmm8, %%xmm7", 387 "vcvtss2sd (%%rax), %%xmm8, %%xmm7") 388 389 GEN_test_RandM(VPACKUSWB_128, 390 "vpackuswb %%xmm9, %%xmm8, %%xmm7", 391 "vpackuswb (%%rax), %%xmm8, %%xmm7") 392 393 GEN_test_RandM(VCVTTSS2SI_64, 394 "vcvttss2si %%xmm8, %%r14", 395 "vcvttss2si (%%rax), %%r14") 396 397 GEN_test_RandM(VCVTSS2SI_64, 398 "vcvtss2si %%xmm8, %%r14", 399 "vcvtss2si (%%rax), %%r14") 400 401 GEN_test_Ronly(VPMOVMSKB_128, 402 "vpmovmskb %%xmm8, %%r14") 403 404 GEN_test_RandM(VPAND_128, 405 "vpand %%xmm9, %%xmm8, %%xmm7", 406 "vpand (%%rax), %%xmm8, %%xmm7") 407 408 GEN_test_Monly(VMOVHPD_128_StoreForm, 409 "vmovhpd %%xmm8, (%%rax)") 410 411 GEN_test_Monly(VMOVHPS_128_StoreForm, 412 "vmovhps %%xmm8, (%%rax)") 413 414 GEN_test_RandM(VPCMPEQB_128, 415 "vpcmpeqb %%xmm9, %%xmm8, %%xmm7", 416 "vpcmpeqb (%%rax), %%xmm8, %%xmm7") 417 418 GEN_test_RandM(VSHUFPS_0x39_128, 419 "vshufps $0x39, %%xmm9, %%xmm8, %%xmm7", 420 "vshufps $0xC6, (%%rax), %%xmm8, %%xmm7") 421 422 GEN_test_RandM(VMULPS_128, 423 "vmulps %%xmm9, %%xmm8, %%xmm7", 424 "vmulps (%%rax), %%xmm8, %%xmm7") 425 426 GEN_test_RandM(VSUBPS_128, 427 "vsubps %%xmm9, %%xmm8, %%xmm7", 428 "vsubps (%%rax), %%xmm8, %%xmm7") 429 430 GEN_test_RandM(VADDPS_128, 431 "vaddps %%xmm9, %%xmm8, %%xmm7", 432 "vaddps (%%rax), %%xmm8, %%xmm7") 433 434 GEN_test_RandM(VMAXPS_128, 435 "vmaxps %%xmm9, %%xmm8, %%xmm7", 436 "vmaxps (%%rax), %%xmm8, %%xmm7") 437 438 GEN_test_RandM(VMAXPS_256, 439 "vmaxps %%ymm9, %%ymm8, %%ymm7", 440 "vmaxps (%%rax), %%ymm8, %%ymm7") 441 442 GEN_test_RandM(VMAXPD_128, 443 "vmaxpd %%xmm9, %%xmm8, %%xmm7", 444 "vmaxpd (%%rax), %%xmm8, %%xmm7") 445 446 GEN_test_RandM(VMAXPD_256, 447 "vmaxpd %%ymm9, %%ymm8, %%ymm7", 448 "vmaxpd (%%rax), %%ymm8, %%ymm7") 449 450 GEN_test_RandM(VMINPS_128, 451 "vminps %%xmm9, %%xmm8, %%xmm7", 452 "vminps (%%rax), %%xmm8, %%xmm7") 453 454 GEN_test_RandM(VMINPS_256, 455 "vminps %%ymm9, %%ymm8, %%ymm7", 456 "vminps (%%rax), %%ymm8, %%ymm7") 457 458 GEN_test_RandM(VMINPD_128, 459 "vminpd %%xmm9, %%xmm8, %%xmm7", 460 "vminpd (%%rax), %%xmm8, %%xmm7") 461 462 GEN_test_RandM(VMINPD_256, 463 "vminpd %%ymm9, %%ymm8, %%ymm7", 464 "vminpd (%%rax), %%ymm8, %%ymm7") 465 466 GEN_test_RandM(VCVTPS2DQ_128, 467 "vcvtps2dq %%xmm8, %%xmm7", 468 "vcvtps2dq (%%rax), %%xmm8") 469 470 GEN_test_RandM(VPSHUFLW_0x39_128, 471 "vpshuflw $0x39, %%xmm9, %%xmm7", 472 "vpshuflw $0xC6, (%%rax), %%xmm8") 473 474 GEN_test_RandM(VPSHUFHW_0x39_128, 475 "vpshufhw $0x39, %%xmm9, %%xmm7", 476 "vpshufhw $0xC6, (%%rax), %%xmm8") 477 478 GEN_test_RandM(VPMULLW_128, 479 "vpmullw %%xmm9, %%xmm8, %%xmm7", 480 "vpmullw (%%rax), %%xmm8, %%xmm7") 481 482 GEN_test_RandM(VPADDUSW_128, 483 "vpaddusw %%xmm9, %%xmm8, %%xmm7", 484 "vpaddusw (%%rax), %%xmm8, %%xmm7") 485 486 GEN_test_RandM(VPMULHUW_128, 487 "vpmulhuw %%xmm9, %%xmm8, %%xmm7", 488 "vpmulhuw (%%rax), %%xmm8, %%xmm7") 489 490 GEN_test_RandM(VPADDUSB_128, 491 "vpaddusb %%xmm9, %%xmm8, %%xmm7", 492 "vpaddusb (%%rax), %%xmm8, %%xmm7") 493 494 GEN_test_RandM(VPUNPCKLWD_128, 495 "vpunpcklwd %%xmm6, %%xmm8, %%xmm7", 496 "vpunpcklwd (%%rax), %%xmm8, %%xmm7") 497 498 GEN_test_RandM(VPUNPCKHWD_128, 499 "vpunpckhwd %%xmm6, %%xmm8, %%xmm7", 500 "vpunpckhwd (%%rax), %%xmm8, %%xmm7") 501 502 GEN_test_Ronly(VPSLLD_0x05_128, 503 "vpslld $0x5, %%xmm9, %%xmm7") 504 505 GEN_test_Ronly(VPSRLD_0x05_128, 506 "vpsrld $0x5, %%xmm9, %%xmm7") 507 508 GEN_test_Ronly(VPSRAD_0x05_128, 509 "vpsrad $0x5, %%xmm9, %%xmm7") 510 511 GEN_test_RandM(VPSUBUSB_128, 512 "vpsubusb %%xmm9, %%xmm8, %%xmm7", 513 "vpsubusb (%%rax), %%xmm8, %%xmm7") 514 515 GEN_test_RandM(VPSUBSB_128, 516 "vpsubsb %%xmm9, %%xmm8, %%xmm7", 517 "vpsubsb (%%rax), %%xmm8, %%xmm7") 518 519 GEN_test_Ronly(VPSRLDQ_0x05_128, 520 "vpsrldq $0x5, %%xmm9, %%xmm7") 521 522 GEN_test_Ronly(VPSLLDQ_0x05_128, 523 "vpslldq $0x5, %%xmm9, %%xmm7") 524 525 GEN_test_RandM(VPANDN_128, 526 "vpandn %%xmm9, %%xmm8, %%xmm7", 527 "vpandn (%%rax), %%xmm8, %%xmm7") 528 529 /* NB tests the mem form only */ 530 GEN_test_Monly(VMOVD_XMM_to_MEM32, 531 "vmovd %%xmm7, (%%rax)") 532 533 GEN_test_RandM(VPINSRD_128, 534 "vpinsrd $0, %%r14d, %%xmm8, %%xmm7", 535 "vpinsrd $3, (%%rax), %%xmm8, %%xmm7") 536 537 GEN_test_RandM(VPUNPCKLQDQ_128, 538 "vpunpcklqdq %%xmm6, %%xmm8, %%xmm7", 539 "vpunpcklqdq (%%rax), %%xmm8, %%xmm7") 540 541 GEN_test_Ronly(VPSRLW_0x05_128, 542 "vpsrlw $0x5, %%xmm9, %%xmm7") 543 544 GEN_test_Ronly(VPSLLW_0x05_128, 545 "vpsllw $0x5, %%xmm9, %%xmm7") 546 547 GEN_test_RandM(VPADDW_128, 548 "vpaddw %%xmm6, %%xmm8, %%xmm7", 549 "vpaddw (%%rax), %%xmm8, %%xmm7") 550 551 GEN_test_RandM(VPACKSSDW_128, 552 "vpackssdw %%xmm9, %%xmm8, %%xmm7", 553 "vpackssdw (%%rax), %%xmm8, %%xmm7") 554 555 GEN_test_RandM(VPUNPCKLDQ_128, 556 "vpunpckldq %%xmm6, %%xmm8, %%xmm7", 557 "vpunpckldq (%%rax), %%xmm8, %%xmm7") 558 559 GEN_test_RandM(VINSERTPS_0x39_128, 560 "vinsertps $0x39, %%xmm6, %%xmm8, %%xmm7", 561 "vinsertps $0xC6, (%%rax), %%xmm8, %%xmm7") 562 563 GEN_test_Monly(VMOVSD_M64_XMM, "vmovsd (%%rax), %%xmm8") 564 565 GEN_test_Monly(VMOVSS_M64_XMM, "vmovss (%%rax), %%xmm8") 566 567 GEN_test_Monly(VMOVSD_XMM_M64, "vmovsd %%xmm8, (%%rax)") 568 569 GEN_test_Monly(VMOVSS_XMM_M32, "vmovss %%xmm8, (%%rax)") 570 571 GEN_test_RandM(VMOVUPD_GtoE_128, 572 "vmovupd %%xmm9, %%xmm6", 573 "vmovupd %%xmm7, (%%rax)") 574 575 GEN_test_RandM(VMOVAPD_EtoG_128, 576 "vmovapd %%xmm6, %%xmm8", 577 "vmovapd (%%rax), %%xmm9") 578 579 GEN_test_RandM(VMOVAPD_EtoG_256, 580 "vmovapd %%ymm6, %%ymm8", 581 "vmovapd (%%rax), %%ymm9") 582 583 GEN_test_RandM(VMOVAPS_EtoG_128, 584 "vmovaps %%xmm6, %%xmm8", 585 "vmovaps (%%rax), %%xmm9") 586 587 GEN_test_RandM(VMOVAPS_GtoE_128, 588 "vmovaps %%xmm9, %%xmm6", 589 "vmovaps %%xmm7, (%%rax)") 590 591 GEN_test_RandM(VMOVAPS_GtoE_256, 592 "vmovaps %%ymm9, %%ymm6", 593 "vmovaps %%ymm7, (%%rax)") 594 595 GEN_test_RandM(VMOVAPD_GtoE_128, 596 "vmovapd %%xmm9, %%xmm6", 597 "vmovapd %%xmm7, (%%rax)") 598 599 GEN_test_RandM(VMOVAPD_GtoE_256, 600 "vmovapd %%ymm9, %%ymm6", 601 "vmovapd %%ymm7, (%%rax)") 602 603 GEN_test_RandM(VMOVDQU_EtoG_128, 604 "vmovdqu %%xmm6, %%xmm8", 605 "vmovdqu (%%rax), %%xmm9") 606 607 GEN_test_RandM(VMOVDQA_EtoG_128, 608 "vmovdqa %%xmm6, %%xmm8", 609 "vmovdqa (%%rax), %%xmm9") 610 611 GEN_test_RandM(VMOVDQA_EtoG_256, 612 "vmovdqa %%ymm6, %%ymm8", 613 "vmovdqa (%%rax), %%ymm9") 614 615 GEN_test_RandM(VMOVDQU_GtoE_128, 616 "vmovdqu %%xmm9, %%xmm6", 617 "vmovdqu %%xmm7, (%%rax)") 618 619 GEN_test_RandM(VMOVDQA_GtoE_128, 620 "vmovdqa %%xmm9, %%xmm6", 621 "vmovdqa %%xmm7, (%%rax)") 622 623 GEN_test_RandM(VMOVDQA_GtoE_256, 624 "vmovdqa %%ymm9, %%ymm6", 625 "vmovdqa %%ymm7, (%%rax)") 626 627 GEN_test_Monly(VMOVQ_XMM_MEM64, "vmovq %%xmm8, (%%rax)") 628 629 GEN_test_RandM(VMOVD_IREGorMEM32_to_XMM, 630 "vmovd %%r14d, %%xmm7", 631 "vmovd (%%rax), %%xmm9") 632 633 GEN_test_RandM(VMOVDDUP_XMMorMEM64_to_XMM, 634 "vmovddup %%xmm8, %%xmm7", 635 "vmovddup (%%rax), %%xmm9") 636 637 GEN_test_RandM(VCMPSS_128_0x0, 638 "vcmpss $0, %%xmm6, %%xmm8, %%xmm7", 639 "vcmpss $0, (%%rax), %%xmm8, %%xmm7") 640 GEN_test_RandM(VCMPSS_128_0x1, 641 "vcmpss $1, %%xmm6, %%xmm8, %%xmm7", 642 "vcmpss $1, (%%rax), %%xmm8, %%xmm7") 643 GEN_test_RandM(VCMPSS_128_0x2, 644 "vcmpss $2, %%xmm6, %%xmm8, %%xmm7", 645 "vcmpss $2, (%%rax), %%xmm8, %%xmm7") 646 GEN_test_RandM(VCMPSS_128_0x3, 647 "vcmpss $3, %%xmm6, %%xmm8, %%xmm7", 648 "vcmpss $3, (%%rax), %%xmm8, %%xmm7") 649 GEN_test_RandM(VCMPSS_128_0x4, 650 "vcmpss $4, %%xmm6, %%xmm8, %%xmm7", 651 "vcmpss $4, (%%rax), %%xmm8, %%xmm7") 652 GEN_test_RandM(VCMPSS_128_0x5, 653 "vcmpss $5, %%xmm6, %%xmm8, %%xmm7", 654 "vcmpss $5, (%%rax), %%xmm8, %%xmm7") 655 GEN_test_RandM(VCMPSS_128_0x6, 656 "vcmpss $6, %%xmm6, %%xmm8, %%xmm7", 657 "vcmpss $6, (%%rax), %%xmm8, %%xmm7") 658 GEN_test_RandM(VCMPSS_128_0x7, 659 "vcmpss $7, %%xmm6, %%xmm8, %%xmm7", 660 "vcmpss $7, (%%rax), %%xmm8, %%xmm7") 661 GEN_test_RandM(VCMPSS_128_0x8, 662 "vcmpss $8, %%xmm6, %%xmm8, %%xmm7", 663 "vcmpss $8, (%%rax), %%xmm8, %%xmm7") 664 GEN_test_RandM(VCMPSS_128_0xA, 665 "vcmpss $0xA, %%xmm6, %%xmm8, %%xmm7", 666 "vcmpss $0xA, (%%rax), %%xmm8, %%xmm7") 667 GEN_test_RandM(VCMPSS_128_0xC, 668 "vcmpss $0xC, %%xmm6, %%xmm8, %%xmm7", 669 "vcmpss $0xC, (%%rax), %%xmm8, %%xmm7") 670 GEN_test_RandM(VCMPSS_128_0xD, 671 "vcmpss $0xD, %%xmm6, %%xmm8, %%xmm7", 672 "vcmpss $0xD, (%%rax), %%xmm8, %%xmm7") 673 GEN_test_RandM(VCMPSS_128_0xE, 674 "vcmpss $0xE, %%xmm6, %%xmm8, %%xmm7", 675 "vcmpss $0xE, (%%rax), %%xmm8, %%xmm7") 676 GEN_test_RandM(VCMPSS_128_0x10, 677 "vcmpss $0x10, %%xmm6, %%xmm8, %%xmm7", 678 "vcmpss $0x10, (%%rax), %%xmm8, %%xmm7") 679 GEN_test_RandM(VCMPSS_128_0x11, 680 "vcmpss $0x11, %%xmm6, %%xmm8, %%xmm7", 681 "vcmpss $0x11, (%%rax), %%xmm8, %%xmm7") 682 GEN_test_RandM(VCMPSS_128_0x12, 683 "vcmpss $0x12, %%xmm6, %%xmm8, %%xmm7", 684 "vcmpss $0x12, (%%rax), %%xmm8, %%xmm7") 685 GEN_test_RandM(VCMPSS_128_0x13, 686 "vcmpss $0x13, %%xmm6, %%xmm8, %%xmm7", 687 "vcmpss $0x13, (%%rax), %%xmm8, %%xmm7") 688 GEN_test_RandM(VCMPSS_128_0x14, 689 "vcmpss $0x14, %%xmm6, %%xmm8, %%xmm7", 690 "vcmpss $0x14, (%%rax), %%xmm8, %%xmm7") 691 GEN_test_RandM(VCMPSS_128_0x15, 692 "vcmpss $0x15, %%xmm6, %%xmm8, %%xmm7", 693 "vcmpss $0x15, (%%rax), %%xmm8, %%xmm7") 694 GEN_test_RandM(VCMPSS_128_0x16, 695 "vcmpss $0x16, %%xmm6, %%xmm8, %%xmm7", 696 "vcmpss $0x16, (%%rax), %%xmm8, %%xmm7") 697 GEN_test_RandM(VCMPSS_128_0x17, 698 "vcmpss $0x17, %%xmm6, %%xmm8, %%xmm7", 699 "vcmpss $0x17, (%%rax), %%xmm8, %%xmm7") 700 GEN_test_RandM(VCMPSS_128_0x18, 701 "vcmpss $0x18, %%xmm6, %%xmm8, %%xmm7", 702 "vcmpss $0x18, (%%rax), %%xmm8, %%xmm7") 703 GEN_test_RandM(VCMPSS_128_0x19, 704 "vcmpss $0x19, %%xmm6, %%xmm8, %%xmm7", 705 "vcmpss $0x19, (%%rax), %%xmm8, %%xmm7") 706 GEN_test_RandM(VCMPSS_128_0x1A, 707 "vcmpss $0x1A, %%xmm6, %%xmm8, %%xmm7", 708 "vcmpss $0x1A, (%%rax), %%xmm8, %%xmm7") 709 GEN_test_RandM(VCMPSS_128_0x1C, 710 "vcmpss $0x1C, %%xmm6, %%xmm8, %%xmm7", 711 "vcmpss $0x1C, (%%rax), %%xmm8, %%xmm7") 712 GEN_test_RandM(VCMPSS_128_0x1D, 713 "vcmpss $0x1D, %%xmm6, %%xmm8, %%xmm7", 714 "vcmpss $0x1D, (%%rax), %%xmm8, %%xmm7") 715 GEN_test_RandM(VCMPSS_128_0x1E, 716 "vcmpss $0x1E, %%xmm6, %%xmm8, %%xmm7", 717 "vcmpss $0x1E, (%%rax), %%xmm8, %%xmm7") 718 719 // The x suffix denotes a 128 -> 64 operation 720 GEN_test_RandM(VCVTPD2PS_128, 721 "vcvtpd2psx %%xmm8, %%xmm7", 722 "vcvtpd2psx (%%rax), %%xmm9") 723 724 GEN_test_RandM(VEXTRACTF128_0x0, 725 "vextractf128 $0x0, %%ymm7, %%xmm9", 726 "vextractf128 $0x0, %%ymm7, (%%rax)") 727 728 GEN_test_RandM(VEXTRACTF128_0x1, 729 "vextractf128 $0x1, %%ymm7, %%xmm9", 730 "vextractf128 $0x1, %%ymm7, (%%rax)") 731 732 GEN_test_RandM(VINSERTF128_0x0, 733 "vinsertf128 $0x0, %%xmm9, %%ymm7, %%ymm8", 734 "vinsertf128 $0x0, (%%rax), %%ymm7, %%ymm8") 735 736 GEN_test_RandM(VINSERTF128_0x1, 737 "vinsertf128 $0x1, %%xmm9, %%ymm7, %%ymm8", 738 "vinsertf128 $0x1, (%%rax), %%ymm7, %%ymm8") 739 740 GEN_test_RandM(VPEXTRD_128_0x0, 741 "vpextrd $0x0, %%xmm7, %%r14d", 742 "vpextrd $0x0, %%xmm7, (%%rax)") 743 744 GEN_test_RandM(VPEXTRD_128_0x3, 745 "vpextrd $0x3, %%xmm7, %%r14d", 746 "vpextrd $0x3, %%xmm7, (%%rax)") 747 748 GEN_test_RandM(VPCMPEQD_128, 749 "vpcmpeqd %%xmm6, %%xmm8, %%xmm7", 750 "vpcmpeqd (%%rax), %%xmm8, %%xmm7") 751 752 GEN_test_RandM(VPSHUFD_0x39_128, 753 "vpshufd $0x39, %%xmm9, %%xmm8", 754 "vpshufd $0xC6, (%%rax), %%xmm7") 755 756 GEN_test_RandM(VMAXSD_128, 757 "vmaxsd %%xmm6, %%xmm8, %%xmm7", 758 "vmaxsd (%%rax), %%xmm8, %%xmm7") 759 760 GEN_test_RandM(VDIVSD_128, 761 "vdivsd %%xmm6, %%xmm8, %%xmm7", 762 "vdivsd (%%rax), %%xmm8, %%xmm7") 763 764 GEN_test_RandM(VMINSD_128, 765 "vminsd %%xmm6, %%xmm8, %%xmm7", 766 "vminsd (%%rax), %%xmm8, %%xmm7") 767 768 GEN_test_RandM(VSUBSD_128, 769 "vsubsd %%xmm6, %%xmm8, %%xmm7", 770 "vsubsd (%%rax), %%xmm8, %%xmm7") 771 772 GEN_test_RandM(VADDSD_128, 773 "vaddsd %%xmm6, %%xmm8, %%xmm7", 774 "vaddsd (%%rax), %%xmm8, %%xmm7") 775 776 GEN_test_RandM(VMULSD_128, 777 "vmulsd %%xmm6, %%xmm8, %%xmm7", 778 "vmulsd (%%rax), %%xmm8, %%xmm7") 779 780 GEN_test_RandM(VXORPS_128, 781 "vxorps %%xmm6, %%xmm8, %%xmm7", 782 "vxorps (%%rax), %%xmm8, %%xmm7") 783 784 GEN_test_RandM(VXORPD_128, 785 "vxorpd %%xmm6, %%xmm8, %%xmm7", 786 "vxorpd (%%rax), %%xmm8, %%xmm7") 787 788 GEN_test_RandM(VORPD_128, 789 "vorpd %%xmm6, %%xmm8, %%xmm7", 790 "vorpd (%%rax), %%xmm8, %%xmm7") 791 792 GEN_test_RandM(VANDNPD_128, 793 "vandnpd %%xmm6, %%xmm8, %%xmm7", 794 "vandnpd (%%rax), %%xmm8, %%xmm7") 795 796 GEN_test_RandM(VCVTPS2PD_128, 797 "vcvtps2pd %%xmm6, %%xmm8", 798 "vcvtps2pd (%%rax), %%xmm8") 799 800 GEN_test_RandM(VUCOMISD_128, 801 "vucomisd %%xmm6, %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14", 802 "vucomisd (%%rax), %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14") 803 804 GEN_test_RandM(VUCOMISS_128, 805 "vucomiss %%xmm6, %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14", 806 "vucomiss (%%rax), %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14") 807 808 GEN_test_RandM(VPINSRQ_128, 809 "vpinsrq $0, %%r14, %%xmm8, %%xmm7", 810 "vpinsrq $1, (%%rax), %%xmm8, %%xmm7") 811 812 GEN_test_RandM(VPADDQ_128, 813 "vpaddq %%xmm6, %%xmm8, %%xmm7", 814 "vpaddq (%%rax), %%xmm8, %%xmm7") 815 816 GEN_test_RandM(VPSUBQ_128, 817 "vpsubq %%xmm6, %%xmm8, %%xmm7", 818 "vpsubq (%%rax), %%xmm8, %%xmm7") 819 820 GEN_test_RandM(VPSUBW_128, 821 "vpsubw %%xmm6, %%xmm8, %%xmm7", 822 "vpsubw (%%rax), %%xmm8, %%xmm7") 823 824 GEN_test_RandM(VMOVUPD_GtoE_256, 825 "vmovupd %%ymm9, %%ymm6", 826 "vmovupd %%ymm7, (%%rax)") 827 828 GEN_test_RandM(VMOVUPD_EtoG_256, 829 "vmovupd %%ymm6, %%ymm9", 830 "vmovupd (%%rax), %%ymm7") 831 832 GEN_test_RandM(VMULPD_256, 833 "vmulpd %%ymm6, %%ymm8, %%ymm7", 834 "vmulpd (%%rax), %%ymm8, %%ymm7") 835 836 GEN_test_RandM(VMOVUPD_EtoG_128, 837 "vmovupd %%xmm6, %%xmm9", 838 "vmovupd (%%rax), %%xmm7") 839 840 GEN_test_RandM(VADDPD_256, 841 "vaddpd %%ymm6, %%ymm8, %%ymm7", 842 "vaddpd (%%rax), %%ymm8, %%ymm7") 843 844 GEN_test_RandM(VSUBPD_256, 845 "vsubpd %%ymm6, %%ymm8, %%ymm7", 846 "vsubpd (%%rax), %%ymm8, %%ymm7") 847 848 GEN_test_RandM(VDIVPD_256, 849 "vdivpd %%ymm6, %%ymm8, %%ymm7", 850 "vdivpd (%%rax), %%ymm8, %%ymm7") 851 852 GEN_test_RandM(VPCMPEQQ_128, 853 "vpcmpeqq %%xmm6, %%xmm8, %%xmm7", 854 "vpcmpeqq (%%rax), %%xmm8, %%xmm7") 855 856 GEN_test_RandM(VSUBPD_128, 857 "vsubpd %%xmm6, %%xmm8, %%xmm7", 858 "vsubpd (%%rax), %%xmm8, %%xmm7") 859 860 GEN_test_RandM(VADDPD_128, 861 "vaddpd %%xmm6, %%xmm8, %%xmm7", 862 "vaddpd (%%rax), %%xmm8, %%xmm7") 863 864 GEN_test_RandM(VUNPCKLPD_128, 865 "vunpcklpd %%xmm6, %%xmm8, %%xmm7", 866 "vunpcklpd (%%rax), %%xmm8, %%xmm7") 867 868 GEN_test_RandM(VUNPCKHPD_128, 869 "vunpckhpd %%xmm6, %%xmm8, %%xmm7", 870 "vunpckhpd (%%rax), %%xmm8, %%xmm7") 871 872 GEN_test_RandM(VUNPCKHPS_128, 873 "vunpckhps %%xmm6, %%xmm8, %%xmm7", 874 "vunpckhps (%%rax), %%xmm8, %%xmm7") 875 876 GEN_test_RandM(VMOVUPS_EtoG_128, 877 "vmovups %%xmm6, %%xmm8", 878 "vmovups (%%rax), %%xmm9") 879 880 GEN_test_RandM(VADDPS_256, 881 "vaddps %%ymm6, %%ymm8, %%ymm7", 882 "vaddps (%%rax), %%ymm8, %%ymm7") 883 884 GEN_test_RandM(VSUBPS_256, 885 "vsubps %%ymm6, %%ymm8, %%ymm7", 886 "vsubps (%%rax), %%ymm8, %%ymm7") 887 888 GEN_test_RandM(VMULPS_256, 889 "vmulps %%ymm6, %%ymm8, %%ymm7", 890 "vmulps (%%rax), %%ymm8, %%ymm7") 891 892 GEN_test_RandM(VDIVPS_256, 893 "vdivps %%ymm6, %%ymm8, %%ymm7", 894 "vdivps (%%rax), %%ymm8, %%ymm7") 895 896 GEN_test_RandM(VPCMPGTQ_128, 897 "vpcmpgtq %%xmm6, %%xmm8, %%xmm7", 898 "vpcmpgtq (%%rax), %%xmm8, %%xmm7") 899 900 GEN_test_RandM(VPEXTRQ_128_0x0, 901 "vpextrq $0x0, %%xmm7, %%r14", 902 "vpextrq $0x0, %%xmm7, (%%rax)") 903 904 GEN_test_RandM(VPEXTRQ_128_0x1, 905 "vpextrq $0x1, %%xmm7, %%r14", 906 "vpextrq $0x1, %%xmm7, (%%rax)") 907 908 GEN_test_Ronly(VPSRLQ_0x05_128, 909 "vpsrlq $0x5, %%xmm9, %%xmm7") 910 911 GEN_test_RandM(VPMULUDQ_128, 912 "vpmuludq %%xmm6, %%xmm8, %%xmm7", 913 "vpmuludq (%%rax), %%xmm8, %%xmm7") 914 915 GEN_test_RandM(VPMULDQ_128, 916 "vpmuldq %%xmm6, %%xmm8, %%xmm7", 917 "vpmuldq (%%rax), %%xmm8, %%xmm7") 918 919 GEN_test_Ronly(VPSLLQ_0x05_128, 920 "vpsllq $0x5, %%xmm9, %%xmm7") 921 922 GEN_test_RandM(VPMAXUD_128, 923 "vpmaxud %%xmm6, %%xmm8, %%xmm7", 924 "vpmaxud (%%rax), %%xmm8, %%xmm7") 925 926 GEN_test_RandM(VPMINUD_128, 927 "vpminud %%xmm6, %%xmm8, %%xmm7", 928 "vpminud (%%rax), %%xmm8, %%xmm7") 929 930 GEN_test_RandM(VPMULLD_128, 931 "vpmulld %%xmm6, %%xmm8, %%xmm7", 932 "vpmulld (%%rax), %%xmm8, %%xmm7") 933 934 GEN_test_RandM(VPMAXUW_128, 935 "vpmaxuw %%xmm6, %%xmm8, %%xmm7", 936 "vpmaxuw (%%rax), %%xmm8, %%xmm7") 937 938 GEN_test_Ronly(VPEXTRW_128_EregOnly_toG_0x0, 939 "vpextrw $0x0, %%xmm7, %%r14d") 940 941 GEN_test_Ronly(VPEXTRW_128_EregOnly_toG_0x7, 942 "vpextrw $0x7, %%xmm7, %%r14d") 943 944 GEN_test_RandM(VPMINUW_128, 945 "vpminuw %%xmm6, %%xmm8, %%xmm7", 946 "vpminuw (%%rax), %%xmm8, %%xmm7") 947 948 GEN_test_RandM(VPHMINPOSUW_128, 949 "vphminposuw %%xmm6, %%xmm8", 950 "vphminposuw (%%rax), %%xmm7") 951 952 GEN_test_RandM(VPMAXSW_128, 953 "vpmaxsw %%xmm6, %%xmm8, %%xmm7", 954 "vpmaxsw (%%rax), %%xmm8, %%xmm7") 955 956 GEN_test_RandM(VPMINSW_128, 957 "vpminsw %%xmm6, %%xmm8, %%xmm7", 958 "vpminsw (%%rax), %%xmm8, %%xmm7") 959 960 GEN_test_RandM(VPMAXUB_128, 961 "vpmaxub %%xmm6, %%xmm8, %%xmm7", 962 "vpmaxub (%%rax), %%xmm8, %%xmm7") 963 964 GEN_test_RandM(VPEXTRB_GtoE_128_0x0, 965 "vpextrb $0x0, %%xmm8, %%r14", 966 "vpextrb $0x0, %%xmm8, (%%rax)") 967 968 GEN_test_RandM(VPEXTRB_GtoE_128_0x1, 969 "vpextrb $0x1, %%xmm8, %%r14", 970 "vpextrb $0x1, %%xmm8, (%%rax)") 971 972 GEN_test_RandM(VPEXTRB_GtoE_128_0x2, 973 "vpextrb $0x2, %%xmm8, %%r14", 974 "vpextrb $0x2, %%xmm8, (%%rax)") 975 976 GEN_test_RandM(VPEXTRB_GtoE_128_0x3, 977 "vpextrb $0x3, %%xmm8, %%r14", 978 "vpextrb $0x3, %%xmm8, (%%rax)") 979 980 GEN_test_RandM(VPEXTRB_GtoE_128_0x4, 981 "vpextrb $0x4, %%xmm8, %%r14", 982 "vpextrb $0x4, %%xmm8, (%%rax)") 983 984 GEN_test_RandM(VPEXTRB_GtoE_128_0x9, 985 "vpextrb $0x9, %%xmm8, %%r14", 986 "vpextrb $0x9, %%xmm8, (%%rax)") 987 988 GEN_test_RandM(VPEXTRB_GtoE_128_0xE, 989 "vpextrb $0xE, %%xmm8, %%r14", 990 "vpextrb $0xE, %%xmm8, (%%rax)") 991 992 GEN_test_RandM(VPEXTRB_GtoE_128_0xF, 993 "vpextrb $0xF, %%xmm8, %%r14", 994 "vpextrb $0xF, %%xmm8, (%%rax)") 995 996 GEN_test_RandM(VPMINUB_128, 997 "vpminub %%xmm6, %%xmm8, %%xmm7", 998 "vpminub (%%rax), %%xmm8, %%xmm7") 999 1000 GEN_test_RandM(VPMAXSB_128, 1001 "vpmaxsb %%xmm6, %%xmm8, %%xmm7", 1002 "vpmaxsb (%%rax), %%xmm8, %%xmm7") 1003 1004 GEN_test_RandM(VPMINSB_128, 1005 "vpminsb %%xmm6, %%xmm8, %%xmm7", 1006 "vpminsb (%%rax), %%xmm8, %%xmm7") 1007 1008 GEN_test_RandM(VPERM2F128_0x00, 1009 "vperm2f128 $0x00, %%ymm6, %%ymm8, %%ymm7", 1010 "vperm2f128 $0x00, (%%rax), %%ymm8, %%ymm7") 1011 GEN_test_RandM(VPERM2F128_0xFF, 1012 "vperm2f128 $0xFF, %%ymm6, %%ymm8, %%ymm7", 1013 "vperm2f128 $0xFF, (%%rax), %%ymm8, %%ymm7") 1014 GEN_test_RandM(VPERM2F128_0x30, 1015 "vperm2f128 $0x30, %%ymm6, %%ymm8, %%ymm7", 1016 "vperm2f128 $0x30, (%%rax), %%ymm8, %%ymm7") 1017 GEN_test_RandM(VPERM2F128_0x21, 1018 "vperm2f128 $0x21, %%ymm6, %%ymm8, %%ymm7", 1019 "vperm2f128 $0x21, (%%rax), %%ymm8, %%ymm7") 1020 GEN_test_RandM(VPERM2F128_0x12, 1021 "vperm2f128 $0x12, %%ymm6, %%ymm8, %%ymm7", 1022 "vperm2f128 $0x12, (%%rax), %%ymm8, %%ymm7") 1023 GEN_test_RandM(VPERM2F128_0x03, 1024 "vperm2f128 $0x03, %%ymm6, %%ymm8, %%ymm7", 1025 "vperm2f128 $0x03, (%%rax), %%ymm8, %%ymm7") 1026 GEN_test_RandM(VPERM2F128_0x85, 1027 "vperm2f128 $0x85, %%ymm6, %%ymm8, %%ymm7", 1028 "vperm2f128 $0x85, (%%rax), %%ymm8, %%ymm7") 1029 GEN_test_RandM(VPERM2F128_0x5A, 1030 "vperm2f128 $0x5A, %%ymm6, %%ymm8, %%ymm7", 1031 "vperm2f128 $0x5A, (%%rax), %%ymm8, %%ymm7") 1032 1033 GEN_test_RandM(VPERMILPD_256_0x0, 1034 "vpermilpd $0x0, %%ymm6, %%ymm8", 1035 "vpermilpd $0x1, (%%rax), %%ymm8") 1036 GEN_test_RandM(VPERMILPD_256_0xF, 1037 "vpermilpd $0xF, %%ymm6, %%ymm8", 1038 "vpermilpd $0xE, (%%rax), %%ymm8") 1039 GEN_test_RandM(VPERMILPD_256_0xA, 1040 "vpermilpd $0xA, %%ymm6, %%ymm8", 1041 "vpermilpd $0xB, (%%rax), %%ymm8") 1042 GEN_test_RandM(VPERMILPD_256_0x5, 1043 "vpermilpd $0x5, %%ymm6, %%ymm8", 1044 "vpermilpd $0x4, (%%rax), %%ymm8") 1045 1046 GEN_test_RandM(VPERMILPD_128_0x0, 1047 "vpermilpd $0x0, %%xmm6, %%xmm8", 1048 "vpermilpd $0x1, (%%rax), %%xmm8") 1049 GEN_test_RandM(VPERMILPD_128_0x3, 1050 "vpermilpd $0x3, %%xmm6, %%xmm8", 1051 "vpermilpd $0x2, (%%rax), %%xmm8") 1052 1053 GEN_test_RandM(VUNPCKLPD_256, 1054 "vunpcklpd %%ymm6, %%ymm8, %%ymm7", 1055 "vunpcklpd (%%rax), %%ymm8, %%ymm7") 1056 1057 GEN_test_RandM(VUNPCKHPD_256, 1058 "vunpckhpd %%ymm6, %%ymm8, %%ymm7", 1059 "vunpckhpd (%%rax), %%ymm8, %%ymm7") 1060 1061 GEN_test_RandM(VSHUFPS_0x39_256, 1062 "vshufps $0x39, %%ymm9, %%ymm8, %%ymm7", 1063 "vshufps $0xC6, (%%rax), %%ymm8, %%ymm7") 1064 1065 GEN_test_RandM(VUNPCKLPS_256, 1066 "vunpcklps %%ymm6, %%ymm8, %%ymm7", 1067 "vunpcklps (%%rax), %%ymm8, %%ymm7") 1068 1069 GEN_test_RandM(VUNPCKHPS_256, 1070 "vunpckhps %%ymm6, %%ymm8, %%ymm7", 1071 "vunpckhps (%%rax), %%ymm8, %%ymm7") 1072 1073 GEN_test_RandM(VXORPD_256, 1074 "vxorpd %%ymm6, %%ymm8, %%ymm7", 1075 "vxorpd (%%rax), %%ymm8, %%ymm7") 1076 1077 GEN_test_Monly(VBROADCASTSD_256, 1078 "vbroadcastsd (%%rax), %%ymm8") 1079 1080 GEN_test_RandM(VCMPPD_128_0x4, 1081 "vcmppd $4, %%xmm6, %%xmm8, %%xmm7", 1082 "vcmppd $4, (%%rax), %%xmm8, %%xmm7") 1083 1084 GEN_test_RandM(VCMPPD_256_0x4, 1085 "vcmppd $4, %%ymm6, %%ymm8, %%ymm7", 1086 "vcmppd $4, (%%rax), %%ymm8, %%ymm7") 1087 1088 GEN_test_RandM(VCMPPS_128_0x4, 1089 "vcmpps $4, %%xmm6, %%xmm8, %%xmm7", 1090 "vcmpps $4, (%%rax), %%xmm8, %%xmm7") 1091 1092 GEN_test_RandM(VCMPPS_256_0x4, 1093 "vcmpps $4, %%ymm6, %%ymm8, %%ymm7", 1094 "vcmpps $4, (%%rax), %%ymm8, %%ymm7") 1095 1096 GEN_test_RandM(VCVTDQ2PD_128, 1097 "vcvtdq2pd %%xmm6, %%xmm8", 1098 "vcvtdq2pd (%%rax), %%xmm8") 1099 1100 GEN_test_RandM(VDIVPD_128, 1101 "vdivpd %%xmm6, %%xmm8, %%xmm7", 1102 "vdivpd (%%rax), %%xmm8, %%xmm7") 1103 1104 GEN_test_RandM(VANDPD_256, 1105 "vandpd %%ymm6, %%ymm8, %%ymm7", 1106 "vandpd (%%rax), %%ymm8, %%ymm7") 1107 1108 GEN_test_RandM(VPMOVSXBW_128, 1109 "vpmovsxbw %%xmm6, %%xmm8", 1110 "vpmovsxbw (%%rax), %%xmm8") 1111 1112 GEN_test_RandM(VPSUBUSW_128, 1113 "vpsubusw %%xmm9, %%xmm8, %%xmm7", 1114 "vpsubusw (%%rax), %%xmm8, %%xmm7") 1115 1116 GEN_test_RandM(VPSUBSW_128, 1117 "vpsubsw %%xmm9, %%xmm8, %%xmm7", 1118 "vpsubsw (%%rax), %%xmm8, %%xmm7") 1119 1120 GEN_test_RandM(VPCMPEQW_128, 1121 "vpcmpeqw %%xmm6, %%xmm8, %%xmm7", 1122 "vpcmpeqw (%%rax), %%xmm8, %%xmm7") 1123 1124 GEN_test_RandM(VPADDB_128, 1125 "vpaddb %%xmm6, %%xmm8, %%xmm7", 1126 "vpaddb (%%rax), %%xmm8, %%xmm7") 1127 1128 GEN_test_RandM(VMOVAPS_EtoG_256, 1129 "vmovaps %%ymm6, %%ymm8", 1130 "vmovaps (%%rax), %%ymm9") 1131 1132 GEN_test_RandM(VCVTDQ2PD_256, 1133 "vcvtdq2pd %%xmm6, %%ymm8", 1134 "vcvtdq2pd (%%rax), %%ymm8") 1135 1136 GEN_test_Monly(VMOVHPD_128_LoadForm, 1137 "vmovhpd (%%rax), %%xmm8, %%xmm7") 1138 1139 GEN_test_Monly(VMOVHPS_128_LoadForm, 1140 "vmovhps (%%rax), %%xmm8, %%xmm7") 1141 1142 // The y suffix denotes a 256 -> 128 operation 1143 GEN_test_RandM(VCVTPD2PS_256, 1144 "vcvtpd2psy %%ymm8, %%xmm7", 1145 "vcvtpd2psy (%%rax), %%xmm9") 1146 1147 GEN_test_RandM(VPUNPCKHDQ_128, 1148 "vpunpckhdq %%xmm6, %%xmm8, %%xmm7", 1149 "vpunpckhdq (%%rax), %%xmm8, %%xmm7") 1150 1151 GEN_test_Monly(VBROADCASTSS_128, 1152 "vbroadcastss (%%rax), %%xmm8") 1153 1154 GEN_test_RandM(VPMOVSXDQ_128, 1155 "vpmovsxdq %%xmm6, %%xmm8", 1156 "vpmovsxdq (%%rax), %%xmm8") 1157 1158 GEN_test_RandM(VPMOVSXWD_128, 1159 "vpmovsxwd %%xmm6, %%xmm8", 1160 "vpmovsxwd (%%rax), %%xmm8") 1161 1162 GEN_test_RandM(VDIVPS_128, 1163 "vdivps %%xmm9, %%xmm8, %%xmm7", 1164 "vdivps (%%rax), %%xmm8, %%xmm7") 1165 1166 GEN_test_RandM(VANDPS_256, 1167 "vandps %%ymm6, %%ymm8, %%ymm7", 1168 "vandps (%%rax), %%ymm8, %%ymm7") 1169 1170 GEN_test_RandM(VXORPS_256, 1171 "vxorps %%ymm6, %%ymm8, %%ymm7", 1172 "vxorps (%%rax), %%ymm8, %%ymm7") 1173 1174 GEN_test_RandM(VORPS_256, 1175 "vorps %%ymm6, %%ymm8, %%ymm7", 1176 "vorps (%%rax), %%ymm8, %%ymm7") 1177 1178 GEN_test_RandM(VANDNPD_256, 1179 "vandnpd %%ymm6, %%ymm8, %%ymm7", 1180 "vandnpd (%%rax), %%ymm8, %%ymm7") 1181 1182 GEN_test_RandM(VANDNPS_256, 1183 "vandnps %%ymm6, %%ymm8, %%ymm7", 1184 "vandnps (%%rax), %%ymm8, %%ymm7") 1185 1186 GEN_test_RandM(VORPD_256, 1187 "vorpd %%ymm6, %%ymm8, %%ymm7", 1188 "vorpd (%%rax), %%ymm8, %%ymm7") 1189 1190 GEN_test_RandM(VPERMILPS_256_0x0F, 1191 "vpermilps $0x0F, %%ymm6, %%ymm8", 1192 "vpermilps $0x1E, (%%rax), %%ymm8") 1193 GEN_test_RandM(VPERMILPS_256_0xFA, 1194 "vpermilps $0xFA, %%ymm6, %%ymm8", 1195 "vpermilps $0xE5, (%%rax), %%ymm8") 1196 GEN_test_RandM(VPERMILPS_256_0xA3, 1197 "vpermilps $0xA3, %%ymm6, %%ymm8", 1198 "vpermilps $0xB4, (%%rax), %%ymm8") 1199 GEN_test_RandM(VPERMILPS_256_0x5A, 1200 "vpermilps $0x5A, %%ymm6, %%ymm8", 1201 "vpermilps $0x45, (%%rax), %%ymm8") 1202 1203 GEN_test_RandM(VPMULHW_128, 1204 "vpmulhw %%xmm9, %%xmm8, %%xmm7", 1205 "vpmulhw (%%rax), %%xmm8, %%xmm7") 1206 1207 GEN_test_RandM(VPUNPCKHQDQ_128, 1208 "vpunpckhqdq %%xmm6, %%xmm8, %%xmm7", 1209 "vpunpckhqdq (%%rax), %%xmm8, %%xmm7") 1210 1211 GEN_test_Ronly(VPSRAW_0x05_128, 1212 "vpsraw $0x5, %%xmm9, %%xmm7") 1213 1214 GEN_test_RandM(VPCMPGTB_128, 1215 "vpcmpgtb %%xmm6, %%xmm8, %%xmm7", 1216 "vpcmpgtb (%%rax), %%xmm8, %%xmm7") 1217 1218 GEN_test_RandM(VPCMPGTW_128, 1219 "vpcmpgtw %%xmm6, %%xmm8, %%xmm7", 1220 "vpcmpgtw (%%rax), %%xmm8, %%xmm7") 1221 1222 GEN_test_RandM(VPCMPGTD_128, 1223 "vpcmpgtd %%xmm6, %%xmm8, %%xmm7", 1224 "vpcmpgtd (%%rax), %%xmm8, %%xmm7") 1225 1226 GEN_test_RandM(VPMOVZXBD_128, 1227 "vpmovzxbd %%xmm6, %%xmm8", 1228 "vpmovzxbd (%%rax), %%xmm8") 1229 1230 GEN_test_RandM(VPMOVSXBD_128, 1231 "vpmovsxbd %%xmm6, %%xmm8", 1232 "vpmovsxbd (%%rax), %%xmm8") 1233 1234 GEN_test_RandM(VPINSRB_128_1of3, 1235 "vpinsrb $0, %%r14d, %%xmm8, %%xmm7", 1236 "vpinsrb $3, (%%rax), %%xmm8, %%xmm7") 1237 GEN_test_RandM(VPINSRB_128_2of3, 1238 "vpinsrb $6, %%r14d, %%xmm8, %%xmm7", 1239 "vpinsrb $9, (%%rax), %%xmm8, %%xmm7") 1240 GEN_test_RandM(VPINSRB_128_3of3, 1241 "vpinsrb $12, %%r14d, %%xmm8, %%xmm7", 1242 "vpinsrb $15, (%%rax), %%xmm8, %%xmm7") 1243 1244 GEN_test_RandM(VPINSRW_128_1of4, 1245 "vpinsrw $0, %%r14d, %%xmm8, %%xmm7", 1246 "vpinsrw $3, (%%rax), %%xmm8, %%xmm7") 1247 GEN_test_RandM(VPINSRW_128_2of4, 1248 "vpinsrw $2, %%r14d, %%xmm8, %%xmm7", 1249 "vpinsrw $3, (%%rax), %%xmm8, %%xmm7") 1250 GEN_test_RandM(VPINSRW_128_3of4, 1251 "vpinsrw $4, %%r14d, %%xmm8, %%xmm7", 1252 "vpinsrw $5, (%%rax), %%xmm8, %%xmm7") 1253 GEN_test_RandM(VPINSRW_128_4of4, 1254 "vpinsrw $6, %%r14d, %%xmm8, %%xmm7", 1255 "vpinsrw $7, (%%rax), %%xmm8, %%xmm7") 1256 1257 GEN_test_RandM(VCOMISD_128, 1258 "vcomisd %%xmm6, %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14", 1259 "vcomisd (%%rax), %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14") 1260 1261 GEN_test_RandM(VCOMISS_128, 1262 "vcomiss %%xmm6, %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14", 1263 "vcomiss (%%rax), %%xmm8; pushfq; popq %%r14; andq $0x8D5, %%r14") 1264 1265 GEN_test_RandM(VMOVUPS_YMM_to_YMMorMEM, 1266 "vmovups %%ymm8, %%ymm7", 1267 "vmovups %%ymm9, (%%rax)") 1268 1269 GEN_test_RandM(VDPPD_128_1of4, 1270 "vdppd $0x00, %%xmm6, %%xmm8, %%xmm7", 1271 "vdppd $0xA5, (%%rax), %%xmm9, %%xmm6") 1272 GEN_test_RandM(VDPPD_128_2of4, 1273 "vdppd $0x5A, %%xmm6, %%xmm8, %%xmm7", 1274 "vdppd $0xFF, (%%rax), %%xmm9, %%xmm6") 1275 GEN_test_RandM(VDPPD_128_3of4, 1276 "vdppd $0x0F, %%xmm6, %%xmm8, %%xmm7", 1277 "vdppd $0x37, (%%rax), %%xmm9, %%xmm6") 1278 GEN_test_RandM(VDPPD_128_4of4, 1279 "vdppd $0xF0, %%xmm6, %%xmm8, %%xmm7", 1280 "vdppd $0x73, (%%rax), %%xmm9, %%xmm6") 1281 1282 GEN_test_RandM(VDPPS_128_1of4, 1283 "vdpps $0x00, %%xmm6, %%xmm8, %%xmm7", 1284 "vdpps $0xA5, (%%rax), %%xmm9, %%xmm6") 1285 GEN_test_RandM(VDPPS_128_2of4, 1286 "vdpps $0x5A, %%xmm6, %%xmm8, %%xmm7", 1287 "vdpps $0xFF, (%%rax), %%xmm9, %%xmm6") 1288 GEN_test_RandM(VDPPS_128_3of4, 1289 "vdpps $0x0F, %%xmm6, %%xmm8, %%xmm7", 1290 "vdpps $0x37, (%%rax), %%xmm9, %%xmm6") 1291 GEN_test_RandM(VDPPS_128_4of4, 1292 "vdpps $0xF0, %%xmm6, %%xmm8, %%xmm7", 1293 "vdpps $0x73, (%%rax), %%xmm9, %%xmm6") 1294 1295 GEN_test_RandM(VDPPS_256_1of4, 1296 "vdpps $0x00, %%ymm6, %%ymm8, %%ymm7", 1297 "vdpps $0xA5, (%%rax), %%ymm9, %%ymm6") 1298 GEN_test_RandM(VDPPS_256_2of4, 1299 "vdpps $0x5A, %%ymm6, %%ymm8, %%ymm7", 1300 "vdpps $0xFF, (%%rax), %%ymm9, %%ymm6") 1301 GEN_test_RandM(VDPPS_256_3of4, 1302 "vdpps $0x0F, %%ymm6, %%ymm8, %%ymm7", 1303 "vdpps $0x37, (%%rax), %%ymm9, %%ymm6") 1304 GEN_test_RandM(VDPPS_256_4of4, 1305 "vdpps $0xF0, %%ymm6, %%ymm8, %%ymm7", 1306 "vdpps $0x73, (%%rax), %%ymm9, %%ymm6") 1307 1308 GEN_test_Monly(VBROADCASTSS_256, 1309 "vbroadcastss (%%rax), %%ymm8") 1310 1311 GEN_test_RandM(VPALIGNR_128_1of3, 1312 "vpalignr $0, %%xmm6, %%xmm8, %%xmm7", 1313 "vpalignr $3, (%%rax), %%xmm8, %%xmm7") 1314 GEN_test_RandM(VPALIGNR_128_2of3, 1315 "vpalignr $6, %%xmm6, %%xmm8, %%xmm7", 1316 "vpalignr $9, (%%rax), %%xmm8, %%xmm7") 1317 GEN_test_RandM(VPALIGNR_128_3of3, 1318 "vpalignr $12, %%xmm6, %%xmm8, %%xmm7", 1319 "vpalignr $15, (%%rax), %%xmm8, %%xmm7") 1320 1321 GEN_test_Ronly(VMOVSD_REG_XMM, "vmovsd %%xmm9, %%xmm7, %%xmm8") 1322 1323 GEN_test_Ronly(VMOVSS_REG_XMM, "vmovss %%xmm9, %%xmm7, %%xmm8") 1324 1325 GEN_test_Monly(VMOVLPD_128_M64_XMM_XMM, "vmovlpd (%%rax), %%xmm8, %%xmm7") 1326 1327 GEN_test_Monly(VMOVLPD_128_XMM_M64, "vmovlpd %%xmm7, (%%rax)") 1328 1329 GEN_test_RandM(VSHUFPD_128_1of2, 1330 "vshufpd $0, %%xmm9, %%xmm8, %%xmm7", 1331 "vshufpd $1, (%%rax), %%xmm8, %%xmm7") 1332 GEN_test_RandM(VSHUFPD_128_2of2, 1333 "vshufpd $2, %%xmm9, %%xmm8, %%xmm7", 1334 "vshufpd $3, (%%rax), %%xmm8, %%xmm7") 1335 1336 GEN_test_RandM(VSHUFPD_256_1of2, 1337 "vshufpd $0x00, %%ymm9, %%ymm8, %%ymm7", 1338 "vshufpd $0xFF, (%%rax), %%ymm8, %%ymm7") 1339 GEN_test_RandM(VSHUFPD_256_2of2, 1340 "vshufpd $0x5A, %%ymm9, %%ymm8, %%ymm7", 1341 "vshufpd $0xA5, (%%rax), %%ymm8, %%ymm7") 1342 1343 GEN_test_RandM(VPERMILPS_128_0x00, 1344 "vpermilps $0x00, %%xmm6, %%xmm8", 1345 "vpermilps $0x01, (%%rax), %%xmm8") 1346 GEN_test_RandM(VPERMILPS_128_0xFE, 1347 "vpermilps $0xFE, %%xmm6, %%xmm8", 1348 "vpermilps $0xFF, (%%rax), %%xmm8") 1349 GEN_test_RandM(VPERMILPS_128_0x30, 1350 "vpermilps $0x30, %%xmm6, %%xmm8", 1351 "vpermilps $0x03, (%%rax), %%xmm8") 1352 GEN_test_RandM(VPERMILPS_128_0x21, 1353 "vpermilps $0x21, %%xmm6, %%xmm8", 1354 "vpermilps $0x12, (%%rax), %%xmm8") 1355 GEN_test_RandM(VPERMILPS_128_0xD7, 1356 "vpermilps $0xD7, %%xmm6, %%xmm8", 1357 "vpermilps $0x6C, (%%rax), %%xmm8") 1358 GEN_test_RandM(VPERMILPS_128_0xB5, 1359 "vpermilps $0xB5, %%xmm6, %%xmm8", 1360 "vpermilps $0x4A, (%%rax), %%xmm8") 1361 GEN_test_RandM(VPERMILPS_128_0x85, 1362 "vpermilps $0x85, %%xmm6, %%xmm8", 1363 "vpermilps $0xDC, (%%rax), %%xmm8") 1364 GEN_test_RandM(VPERMILPS_128_0x29, 1365 "vpermilps $0x29, %%xmm6, %%xmm8", 1366 "vpermilps $0x92, (%%rax), %%xmm8") 1367 1368 GEN_test_RandM(VBLENDPS_128_1of3, 1369 "vblendps $0, %%xmm6, %%xmm8, %%xmm7", 1370 "vblendps $3, (%%rax), %%xmm8, %%xmm7") 1371 GEN_test_RandM(VBLENDPS_128_2of3, 1372 "vblendps $6, %%xmm6, %%xmm8, %%xmm7", 1373 "vblendps $9, (%%rax), %%xmm8, %%xmm7") 1374 GEN_test_RandM(VBLENDPS_128_3of3, 1375 "vblendps $12, %%xmm6, %%xmm8, %%xmm7", 1376 "vblendps $15, (%%rax), %%xmm8, %%xmm7") 1377 1378 GEN_test_RandM(VBLENDPD_128_1of2, 1379 "vblendpd $0, %%xmm6, %%xmm8, %%xmm7", 1380 "vblendpd $1, (%%rax), %%xmm8, %%xmm7") 1381 GEN_test_RandM(VBLENDPD_128_2of2, 1382 "vblendpd $2, %%xmm6, %%xmm8, %%xmm7", 1383 "vblendpd $3, (%%rax), %%xmm8, %%xmm7") 1384 1385 GEN_test_RandM(VBLENDPD_256_1of3, 1386 "vblendpd $0, %%ymm6, %%ymm8, %%ymm7", 1387 "vblendpd $3, (%%rax), %%ymm8, %%ymm7") 1388 GEN_test_RandM(VBLENDPD_256_2of3, 1389 "vblendpd $6, %%ymm6, %%ymm8, %%ymm7", 1390 "vblendpd $9, (%%rax), %%ymm8, %%ymm7") 1391 GEN_test_RandM(VBLENDPD_256_3of3, 1392 "vblendpd $12, %%ymm6, %%ymm8, %%ymm7", 1393 "vblendpd $15, (%%rax), %%ymm8, %%ymm7") 1394 1395 GEN_test_RandM(VPBLENDW_128_0x00, 1396 "vpblendw $0x00, %%xmm6, %%xmm8, %%xmm7", 1397 "vpblendw $0x01, (%%rax), %%xmm8, %%xmm7") 1398 GEN_test_RandM(VPBLENDW_128_0xFE, 1399 "vpblendw $0xFE, %%xmm6, %%xmm8, %%xmm7", 1400 "vpblendw $0xFF, (%%rax), %%xmm8, %%xmm7") 1401 GEN_test_RandM(VPBLENDW_128_0x30, 1402 "vpblendw $0x30, %%xmm6, %%xmm8, %%xmm7", 1403 "vpblendw $0x03, (%%rax), %%xmm8, %%xmm7") 1404 GEN_test_RandM(VPBLENDW_128_0x21, 1405 "vpblendw $0x21, %%xmm6, %%xmm8, %%xmm7", 1406 "vpblendw $0x12, (%%rax), %%xmm8, %%xmm7") 1407 GEN_test_RandM(VPBLENDW_128_0xD7, 1408 "vpblendw $0xD7, %%xmm6, %%xmm8, %%xmm7", 1409 "vpblendw $0x6C, (%%rax), %%xmm8, %%xmm7") 1410 GEN_test_RandM(VPBLENDW_128_0xB5, 1411 "vpblendw $0xB5, %%xmm6, %%xmm8, %%xmm7", 1412 "vpblendw $0x4A, (%%rax), %%xmm8, %%xmm7") 1413 GEN_test_RandM(VPBLENDW_128_0x85, 1414 "vpblendw $0x85, %%xmm6, %%xmm8, %%xmm7", 1415 "vpblendw $0xDC, (%%rax), %%xmm8, %%xmm7") 1416 GEN_test_RandM(VPBLENDW_128_0x29, 1417 "vpblendw $0x29, %%xmm6, %%xmm8, %%xmm7", 1418 "vpblendw $0x92, (%%rax), %%xmm8, %%xmm7") 1419 1420 GEN_test_RandM(VMOVUPS_EtoG_256, 1421 "vmovups %%ymm6, %%ymm9", 1422 "vmovups (%%rax), %%ymm7") 1423 1424 GEN_test_RandM(VSQRTSS_128, 1425 "vsqrtss %%xmm6, %%xmm8, %%xmm7", 1426 "vsqrtss (%%rax), %%xmm8, %%xmm7") 1427 1428 GEN_test_RandM(VSQRTPS_128, 1429 "vsqrtps %%xmm6, %%xmm8", 1430 "vsqrtps (%%rax), %%xmm8") 1431 1432 GEN_test_RandM(VSQRTPS_256, 1433 "vsqrtps %%ymm6, %%ymm8", 1434 "vsqrtps (%%rax), %%ymm8") 1435 1436 GEN_test_RandM(VSQRTPD_128, 1437 "vsqrtpd %%xmm6, %%xmm8", 1438 "vsqrtpd (%%rax), %%xmm8") 1439 1440 GEN_test_RandM(VSQRTPD_256, 1441 "vsqrtpd %%ymm6, %%ymm8", 1442 "vsqrtpd (%%rax), %%ymm8") 1443 1444 GEN_test_RandM(VRSQRTSS_128, 1445 "vrsqrtss %%xmm6, %%xmm8, %%xmm7", 1446 "vrsqrtss (%%rax), %%xmm8, %%xmm7") 1447 1448 GEN_test_RandM(VRSQRTPS_128, 1449 "vrsqrtps %%xmm6, %%xmm8", 1450 "vrsqrtps (%%rax), %%xmm8") 1451 1452 GEN_test_RandM(VRSQRTPS_256, 1453 "vrsqrtps %%ymm6, %%ymm8", 1454 "vrsqrtps (%%rax), %%ymm8") 1455 1456 GEN_test_RandM(VMOVDQU_GtoE_256, 1457 "vmovdqu %%ymm9, %%ymm6", 1458 "vmovdqu %%ymm7, (%%rax)") 1459 1460 GEN_test_RandM(VCVTPS2PD_256, 1461 "vcvtps2pd %%xmm9, %%ymm6", 1462 "vcvtps2pd (%%rax), %%ymm7") 1463 1464 GEN_test_RandM(VCVTTPS2DQ_128, 1465 "vcvttps2dq %%xmm9, %%xmm6", 1466 "vcvttps2dq (%%rax), %%xmm7") 1467 1468 GEN_test_RandM(VCVTTPS2DQ_256, 1469 "vcvttps2dq %%ymm9, %%ymm6", 1470 "vcvttps2dq (%%rax), %%ymm7") 1471 1472 GEN_test_RandM(VCVTDQ2PS_128, 1473 "vcvtdq2ps %%xmm9, %%xmm6", 1474 "vcvtdq2ps (%%rax), %%xmm7") 1475 1476 GEN_test_RandM(VCVTDQ2PS_256, 1477 "vcvtdq2ps %%ymm9, %%ymm6", 1478 "vcvtdq2ps (%%rax), %%ymm7") 1479 1480 GEN_test_RandM(VCVTTPD2DQ_128, 1481 "vcvttpd2dqx %%xmm9, %%xmm6", 1482 "vcvttpd2dqx (%%rax), %%xmm7") 1483 1484 GEN_test_RandM(VCVTTPD2DQ_256, 1485 "vcvttpd2dqy %%ymm9, %%xmm6", 1486 "vcvttpd2dqy (%%rax), %%xmm7") 1487 1488 GEN_test_RandM(VCVTPD2DQ_128, 1489 "vcvtpd2dqx %%xmm9, %%xmm6", 1490 "vcvtpd2dqx (%%rax), %%xmm7") 1491 1492 GEN_test_RandM(VCVTPD2DQ_256, 1493 "vcvtpd2dqy %%ymm9, %%xmm6", 1494 "vcvtpd2dqy (%%rax), %%xmm7") 1495 1496 GEN_test_RandM(VMOVSLDUP_128, 1497 "vmovsldup %%xmm9, %%xmm6", 1498 "vmovsldup (%%rax), %%xmm7") 1499 1500 GEN_test_RandM(VMOVSLDUP_256, 1501 "vmovsldup %%ymm9, %%ymm6", 1502 "vmovsldup (%%rax), %%ymm7") 1503 1504 GEN_test_RandM(VMOVSHDUP_128, 1505 "vmovshdup %%xmm9, %%xmm6", 1506 "vmovshdup (%%rax), %%xmm7") 1507 1508 GEN_test_RandM(VMOVSHDUP_256, 1509 "vmovshdup %%ymm9, %%ymm6", 1510 "vmovshdup (%%rax), %%ymm7") 1511 1512 GEN_test_RandM(VPERMILPS_VAR_128, 1513 "vpermilps %%xmm6, %%xmm8, %%xmm7", 1514 "vpermilps (%%rax), %%xmm8, %%xmm7") 1515 1516 GEN_test_RandM(VPERMILPD_VAR_128, 1517 "vpermilpd %%xmm6, %%xmm8, %%xmm7", 1518 "vpermilpd (%%rax), %%xmm8, %%xmm7") 1519 1520 GEN_test_RandM(VPERMILPS_VAR_256, 1521 "vpermilps %%ymm6, %%ymm8, %%ymm7", 1522 "vpermilps (%%rax), %%ymm8, %%ymm7") 1523 1524 GEN_test_RandM(VPERMILPD_VAR_256, 1525 "vpermilpd %%ymm6, %%ymm8, %%ymm7", 1526 "vpermilpd (%%rax), %%ymm8, %%ymm7") 1527 1528 GEN_test_RandM(VPSLLW_128, 1529 "andl $15, %%r14d;" 1530 "vmovd %%r14d, %%xmm6;" 1531 "vpsllw %%xmm6, %%xmm8, %%xmm9", 1532 "andq $15, 128(%%rax);" 1533 "vpsllw 128(%%rax), %%xmm8, %%xmm9") 1534 1535 GEN_test_RandM(VPSRLW_128, 1536 "andl $15, %%r14d;" 1537 "vmovd %%r14d, %%xmm6;" 1538 "vpsrlw %%xmm6, %%xmm8, %%xmm9", 1539 "andq $15, 128(%%rax);" 1540 "vpsrlw 128(%%rax), %%xmm8, %%xmm9") 1541 1542 GEN_test_RandM(VPSRAW_128, 1543 "andl $31, %%r14d;" 1544 "vmovd %%r14d, %%xmm6;" 1545 "vpsraw %%xmm6, %%xmm8, %%xmm9", 1546 "andq $15, 128(%%rax);" 1547 "vpsraw 128(%%rax), %%xmm8, %%xmm9") 1548 1549 GEN_test_RandM(VPSLLD_128, 1550 "andl $31, %%r14d;" 1551 "vmovd %%r14d, %%xmm6;" 1552 "vpslld %%xmm6, %%xmm8, %%xmm9", 1553 "andq $31, 128(%%rax);" 1554 "vpslld 128(%%rax), %%xmm8, %%xmm9") 1555 1556 GEN_test_RandM(VPSRLD_128, 1557 "andl $31, %%r14d;" 1558 "vmovd %%r14d, %%xmm6;" 1559 "vpsrld %%xmm6, %%xmm8, %%xmm9", 1560 "andq $31, 128(%%rax);" 1561 "vpsrld 128(%%rax), %%xmm8, %%xmm9") 1562 1563 GEN_test_RandM(VPSRAD_128, 1564 "andl $31, %%r14d;" 1565 "vmovd %%r14d, %%xmm6;" 1566 "vpsrad %%xmm6, %%xmm8, %%xmm9", 1567 "andq $31, 128(%%rax);" 1568 "vpsrad 128(%%rax), %%xmm8, %%xmm9") 1569 1570 GEN_test_RandM(VPSLLQ_128, 1571 "andl $63, %%r14d;" 1572 "vmovd %%r14d, %%xmm6;" 1573 "vpsllq %%xmm6, %%xmm8, %%xmm9", 1574 "andq $63, 128(%%rax);" 1575 "vpsllq 128(%%rax), %%xmm8, %%xmm9") 1576 1577 GEN_test_RandM(VPSRLQ_128, 1578 "andl $63, %%r14d;" 1579 "vmovd %%r14d, %%xmm6;" 1580 "vpsrlq %%xmm6, %%xmm8, %%xmm9", 1581 "andq $63, 128(%%rax);" 1582 "vpsrlq 128(%%rax), %%xmm8, %%xmm9") 1583 1584 GEN_test_RandM(VROUNDPS_128_0x0, 1585 "vroundps $0x0, %%xmm8, %%xmm9", 1586 "vroundps $0x0, (%%rax), %%xmm9") 1587 GEN_test_RandM(VROUNDPS_128_0x1, 1588 "vroundps $0x1, %%xmm8, %%xmm9", 1589 "vroundps $0x1, (%%rax), %%xmm9") 1590 GEN_test_RandM(VROUNDPS_128_0x2, 1591 "vroundps $0x2, %%xmm8, %%xmm9", 1592 "vroundps $0x2, (%%rax), %%xmm9") 1593 GEN_test_RandM(VROUNDPS_128_0x3, 1594 "vroundps $0x3, %%xmm8, %%xmm9", 1595 "vroundps $0x3, (%%rax), %%xmm9") 1596 GEN_test_RandM(VROUNDPS_128_0x4, 1597 "vroundps $0x4, %%xmm8, %%xmm9", 1598 "vroundps $0x4, (%%rax), %%xmm9") 1599 1600 GEN_test_RandM(VROUNDPS_256_0x0, 1601 "vroundps $0x0, %%ymm8, %%ymm9", 1602 "vroundps $0x0, (%%rax), %%ymm9") 1603 GEN_test_RandM(VROUNDPS_256_0x1, 1604 "vroundps $0x1, %%ymm8, %%ymm9", 1605 "vroundps $0x1, (%%rax), %%ymm9") 1606 GEN_test_RandM(VROUNDPS_256_0x2, 1607 "vroundps $0x2, %%ymm8, %%ymm9", 1608 "vroundps $0x2, (%%rax), %%ymm9") 1609 GEN_test_RandM(VROUNDPS_256_0x3, 1610 "vroundps $0x3, %%ymm8, %%ymm9", 1611 "vroundps $0x3, (%%rax), %%ymm9") 1612 GEN_test_RandM(VROUNDPS_256_0x4, 1613 "vroundps $0x4, %%ymm8, %%ymm9", 1614 "vroundps $0x4, (%%rax), %%ymm9") 1615 1616 GEN_test_RandM(VROUNDPD_128_0x0, 1617 "vroundpd $0x0, %%xmm8, %%xmm9", 1618 "vroundpd $0x0, (%%rax), %%xmm9") 1619 GEN_test_RandM(VROUNDPD_128_0x1, 1620 "vroundpd $0x1, %%xmm8, %%xmm9", 1621 "vroundpd $0x1, (%%rax), %%xmm9") 1622 GEN_test_RandM(VROUNDPD_128_0x2, 1623 "vroundpd $0x2, %%xmm8, %%xmm9", 1624 "vroundpd $0x2, (%%rax), %%xmm9") 1625 GEN_test_RandM(VROUNDPD_128_0x3, 1626 "vroundpd $0x3, %%xmm8, %%xmm9", 1627 "vroundpd $0x3, (%%rax), %%xmm9") 1628 GEN_test_RandM(VROUNDPD_128_0x4, 1629 "vroundpd $0x4, %%xmm8, %%xmm9", 1630 "vroundpd $0x4, (%%rax), %%xmm9") 1631 1632 GEN_test_RandM(VROUNDPD_256_0x0, 1633 "vroundpd $0x0, %%ymm8, %%ymm9", 1634 "vroundpd $0x0, (%%rax), %%ymm9") 1635 GEN_test_RandM(VROUNDPD_256_0x1, 1636 "vroundpd $0x1, %%ymm8, %%ymm9", 1637 "vroundpd $0x1, (%%rax), %%ymm9") 1638 GEN_test_RandM(VROUNDPD_256_0x2, 1639 "vroundpd $0x2, %%ymm8, %%ymm9", 1640 "vroundpd $0x2, (%%rax), %%ymm9") 1641 GEN_test_RandM(VROUNDPD_256_0x3, 1642 "vroundpd $0x3, %%ymm8, %%ymm9", 1643 "vroundpd $0x3, (%%rax), %%ymm9") 1644 GEN_test_RandM(VROUNDPD_256_0x4, 1645 "vroundpd $0x4, %%ymm8, %%ymm9", 1646 "vroundpd $0x4, (%%rax), %%ymm9") 1647 1648 GEN_test_RandM(VPMADDWD_128, 1649 "vpmaddwd %%xmm6, %%xmm8, %%xmm7", 1650 "vpmaddwd (%%rax), %%xmm8, %%xmm7") 1651 1652 GEN_test_RandM(VADDSUBPS_128, 1653 "vaddsubps %%xmm6, %%xmm8, %%xmm7", 1654 "vaddsubps (%%rax), %%xmm8, %%xmm7") 1655 1656 GEN_test_RandM(VADDSUBPS_256, 1657 "vaddsubps %%ymm6, %%ymm8, %%ymm7", 1658 "vaddsubps (%%rax), %%ymm8, %%ymm7") 1659 1660 GEN_test_RandM(VADDSUBPD_128, 1661 "vaddsubpd %%xmm6, %%xmm8, %%xmm7", 1662 "vaddsubpd (%%rax), %%xmm8, %%xmm7") 1663 1664 GEN_test_RandM(VADDSUBPD_256, 1665 "vaddsubpd %%ymm6, %%ymm8, %%ymm7", 1666 "vaddsubpd (%%rax), %%ymm8, %%ymm7") 1667 1668 GEN_test_RandM(VROUNDSS_0x0, 1669 "vroundss $0x0, %%xmm8, %%xmm6, %%xmm9", 1670 "vroundss $0x0, (%%rax), %%xmm6, %%xmm9") 1671 GEN_test_RandM(VROUNDSS_0x1, 1672 "vroundss $0x1, %%xmm8, %%xmm6, %%xmm9", 1673 "vroundss $0x1, (%%rax), %%xmm6, %%xmm9") 1674 GEN_test_RandM(VROUNDSS_0x2, 1675 "vroundss $0x2, %%xmm8, %%xmm6, %%xmm9", 1676 "vroundss $0x2, (%%rax), %%xmm6, %%xmm9") 1677 GEN_test_RandM(VROUNDSS_0x3, 1678 "vroundss $0x3, %%xmm8, %%xmm6, %%xmm9", 1679 "vroundss $0x3, (%%rax), %%xmm6, %%xmm9") 1680 GEN_test_RandM(VROUNDSS_0x4, 1681 "vroundss $0x4, %%xmm8, %%xmm6, %%xmm9", 1682 "vroundss $0x4, (%%rax), %%xmm6, %%xmm9") 1683 GEN_test_RandM(VROUNDSS_0x5, 1684 "vroundss $0x5, %%xmm8, %%xmm6, %%xmm9", 1685 "vroundss $0x5, (%%rax), %%xmm6, %%xmm9") 1686 1687 GEN_test_RandM(VROUNDSD_0x0, 1688 "vroundsd $0x0, %%xmm8, %%xmm6, %%xmm9", 1689 "vroundsd $0x0, (%%rax), %%xmm6, %%xmm9") 1690 GEN_test_RandM(VROUNDSD_0x1, 1691 "vroundsd $0x1, %%xmm8, %%xmm6, %%xmm9", 1692 "vroundsd $0x1, (%%rax), %%xmm6, %%xmm9") 1693 GEN_test_RandM(VROUNDSD_0x2, 1694 "vroundsd $0x2, %%xmm8, %%xmm6, %%xmm9", 1695 "vroundsd $0x2, (%%rax), %%xmm6, %%xmm9") 1696 GEN_test_RandM(VROUNDSD_0x3, 1697 "vroundsd $0x3, %%xmm8, %%xmm6, %%xmm9", 1698 "vroundsd $0x3, (%%rax), %%xmm6, %%xmm9") 1699 GEN_test_RandM(VROUNDSD_0x4, 1700 "vroundsd $0x4, %%xmm8, %%xmm6, %%xmm9", 1701 "vroundsd $0x4, (%%rax), %%xmm6, %%xmm9") 1702 GEN_test_RandM(VROUNDSD_0x5, 1703 "vroundsd $0x5, %%xmm8, %%xmm6, %%xmm9", 1704 "vroundsd $0x5, (%%rax), %%xmm6, %%xmm9") 1705 1706 GEN_test_RandM(VPTEST_128_1, 1707 "vptest %%xmm6, %%xmm8; " 1708 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1709 "vptest (%%rax), %%xmm8; " 1710 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1711 1712 /* Here we ignore the boilerplate-supplied data and try to do 1713 x AND x and x AND NOT x. Not a great test but better 1714 than nothing. */ 1715 GEN_test_RandM(VPTEST_128_2, 1716 "vmovups %%xmm6, %%xmm8;" 1717 "vptest %%xmm6, %%xmm8; " 1718 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1719 "vmovups (%%rax), %%xmm8;" 1720 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1721 "vxorpd %%xmm8,%%xmm7,%%xmm8;" 1722 "vptest (%%rax), %%xmm8; " 1723 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1724 1725 GEN_test_RandM(VPTEST_256_1, 1726 "vptest %%ymm6, %%ymm8; " 1727 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1728 "vptest (%%rax), %%ymm8; " 1729 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1730 1731 /* Here we ignore the boilerplate-supplied data and try to do 1732 x AND x and x AND NOT x. Not a great test but better 1733 than nothing. */ 1734 GEN_test_RandM(VPTEST_256_2, 1735 "vmovups %%ymm6, %%ymm8;" 1736 "vptest %%ymm6, %%ymm8; " 1737 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1738 "vmovups (%%rax), %%ymm8;" 1739 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1740 "subq $1024, %%rsp;" 1741 "vmovups %%xmm7,512(%%rsp);" 1742 "vmovups %%xmm7,528(%%rsp);" 1743 "vmovups 512(%%rsp), %%ymm7;" 1744 "addq $1024, %%rsp;" 1745 "vxorpd %%ymm8,%%ymm7,%%ymm8;" 1746 "vptest (%%rax), %%ymm8; " 1747 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1748 1749 1750 /* VTESTPS/VTESTPD: test once with all-0 operands, once with 1751 one all-0s and one all 1s, and once with random data. */ 1752 1753 GEN_test_RandM(VTESTPS_128_1, 1754 "vtestps %%xmm6, %%xmm8; " 1755 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1756 "vtestps (%%rax), %%xmm8; " 1757 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1758 1759 /* Here we ignore the boilerplate-supplied data and try to do 1760 x AND x and x AND NOT x. Not a great test but better 1761 than nothing. */ 1762 GEN_test_RandM(VTESTPS_128_2, 1763 "vmovups %%xmm6, %%xmm8;" 1764 "vtestps %%xmm6, %%xmm8; " 1765 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1766 "vmovups (%%rax), %%xmm8;" 1767 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1768 "vxorpd %%xmm8,%%xmm7,%%xmm8;" 1769 "vtestps (%%rax), %%xmm8; " 1770 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1771 1772 GEN_test_RandM(VTESTPS_128_3, 1773 "vtestps %%xmm8, %%xmm9; " 1774 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1775 "vtestps (%%rax), %%xmm9; " 1776 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1777 1778 1779 1780 1781 GEN_test_RandM(VTESTPS_256_1, 1782 "vtestps %%ymm6, %%ymm8; " 1783 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1784 "vtestps (%%rax), %%ymm8; " 1785 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1786 1787 /* Here we ignore the boilerplate-supplied data and try to do 1788 x AND x and x AND NOT x. Not a great test but better 1789 than nothing. */ 1790 GEN_test_RandM(VTESTPS_256_2, 1791 "vmovups %%ymm6, %%ymm8;" 1792 "vtestps %%ymm6, %%ymm8; " 1793 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1794 "vmovups (%%rax), %%ymm8;" 1795 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1796 "subq $1024, %%rsp;" 1797 "vmovups %%xmm7,512(%%rsp);" 1798 "vmovups %%xmm7,528(%%rsp);" 1799 "vmovups 512(%%rsp), %%ymm7;" 1800 "addq $1024, %%rsp;" 1801 "vxorpd %%ymm8,%%ymm7,%%ymm8;" 1802 "vtestps (%%rax), %%ymm8; " 1803 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1804 1805 GEN_test_RandM(VTESTPS_256_3, 1806 "vtestps %%ymm8, %%ymm9; " 1807 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1808 "vtestps (%%rax), %%ymm9; " 1809 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1810 1811 1812 1813 GEN_test_RandM(VTESTPD_128_1, 1814 "vtestpd %%xmm6, %%xmm8; " 1815 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1816 "vtestpd (%%rax), %%xmm8; " 1817 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1818 1819 /* Here we ignore the boilerplate-supplied data and try to do 1820 x AND x and x AND NOT x. Not a great test but better 1821 than nothing. */ 1822 GEN_test_RandM(VTESTPD_128_2, 1823 "vmovups %%xmm6, %%xmm8;" 1824 "vtestpd %%xmm6, %%xmm8; " 1825 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1826 "vmovups (%%rax), %%xmm8;" 1827 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1828 "vxorpd %%xmm8,%%xmm7,%%xmm8;" 1829 "vtestpd (%%rax), %%xmm8; " 1830 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1831 1832 GEN_test_RandM(VTESTPD_128_3, 1833 "vtestpd %%xmm8, %%xmm9; " 1834 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1835 "vtestpd (%%rax), %%xmm9; " 1836 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1837 1838 1839 1840 1841 GEN_test_RandM(VTESTPD_256_1, 1842 "vtestpd %%ymm6, %%ymm8; " 1843 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1844 "vtestpd (%%rax), %%ymm8; " 1845 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1846 1847 /* Here we ignore the boilerplate-supplied data and try to do 1848 x AND x and x AND NOT x. Not a great test but better 1849 than nothing. */ 1850 GEN_test_RandM(VTESTPD_256_2, 1851 "vmovups %%ymm6, %%ymm8;" 1852 "vtestpd %%ymm6, %%ymm8; " 1853 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1854 "vmovups (%%rax), %%ymm8;" 1855 "vcmpeqpd %%xmm8,%%xmm8,%%xmm7;" 1856 "subq $1024, %%rsp;" 1857 "vmovups %%xmm7,512(%%rsp);" 1858 "vmovups %%xmm7,528(%%rsp);" 1859 "vmovups 512(%%rsp), %%ymm7;" 1860 "addq $1024, %%rsp;" 1861 "vxorpd %%ymm8,%%ymm7,%%ymm8;" 1862 "vtestpd (%%rax), %%ymm8; " 1863 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1864 1865 GEN_test_RandM(VTESTPD_256_3, 1866 "vtestpd %%ymm8, %%ymm9; " 1867 "pushfq; popq %%r14; andq $0x8D5, %%r14", 1868 "vtestpd (%%rax), %%ymm9; " 1869 "pushfq; popq %%r14; andq $0x8D5, %%r14") 1870 1871 GEN_test_RandM(VBLENDVPS_128, 1872 "vblendvps %%xmm9, %%xmm6, %%xmm8, %%xmm7", 1873 "vblendvps %%xmm9, (%%rax), %%xmm8, %%xmm7") 1874 1875 GEN_test_RandM(VBLENDVPS_256, 1876 "vblendvps %%ymm9, %%ymm6, %%ymm8, %%ymm7", 1877 "vblendvps %%ymm9, (%%rax), %%ymm8, %%ymm7") 1878 1879 GEN_test_RandM(VBLENDVPD_128, 1880 "vblendvpd %%xmm9, %%xmm6, %%xmm8, %%xmm7", 1881 "vblendvpd %%xmm9, (%%rax), %%xmm8, %%xmm7") 1882 1883 GEN_test_RandM(VBLENDVPD_256, 1884 "vblendvpd %%ymm9, %%ymm6, %%ymm8, %%ymm7", 1885 "vblendvpd %%ymm9, (%%rax), %%ymm8, %%ymm7") 1886 1887 1888 GEN_test_RandM(VHADDPS_128, 1889 "vhaddps %%xmm6, %%xmm8, %%xmm7", 1890 "vhaddps (%%rax), %%xmm8, %%xmm7") 1891 1892 GEN_test_RandM(VHADDPS_256, 1893 "vhaddps %%ymm6, %%ymm8, %%ymm7", 1894 "vhaddps (%%rax), %%ymm8, %%ymm7") 1895 1896 GEN_test_RandM(VHADDPD_128, 1897 "vhaddpd %%xmm6, %%xmm8, %%xmm7", 1898 "vhaddpd (%%rax), %%xmm8, %%xmm7") 1899 1900 GEN_test_RandM(VHADDPD_256, 1901 "vhaddpd %%ymm6, %%ymm8, %%ymm7", 1902 "vhaddpd (%%rax), %%ymm8, %%ymm7") 1903 1904 GEN_test_RandM(VHSUBPS_128, 1905 "vhsubps %%xmm6, %%xmm8, %%xmm7", 1906 "vhsubps (%%rax), %%xmm8, %%xmm7") 1907 1908 GEN_test_RandM(VHSUBPS_256, 1909 "vhsubps %%ymm6, %%ymm8, %%ymm7", 1910 "vhsubps (%%rax), %%ymm8, %%ymm7") 1911 1912 GEN_test_RandM(VHSUBPD_128, 1913 "vhsubpd %%xmm6, %%xmm8, %%xmm7", 1914 "vhsubpd (%%rax), %%xmm8, %%xmm7") 1915 1916 GEN_test_RandM(VHSUBPD_256, 1917 "vhsubpd %%ymm6, %%ymm8, %%ymm7", 1918 "vhsubpd (%%rax), %%ymm8, %%ymm7") 1919 1920 GEN_test_RandM(VEXTRACTPS_0x0, 1921 "vextractps $0, %%xmm8, %%r14d", 1922 "vextractps $0, %%xmm8, (%%rax)") 1923 1924 GEN_test_RandM(VEXTRACTPS_0x1, 1925 "vextractps $1, %%xmm8, %%r14d", 1926 "vextractps $1, %%xmm8, (%%rax)") 1927 1928 GEN_test_RandM(VEXTRACTPS_0x2, 1929 "vextractps $2, %%xmm8, %%r14d", 1930 "vextractps $2, %%xmm8, (%%rax)") 1931 1932 GEN_test_RandM(VEXTRACTPS_0x3, 1933 "vextractps $3, %%xmm8, %%r14d", 1934 "vextractps $3, %%xmm8, (%%rax)") 1935 1936 GEN_test_Monly(VLDDQU_128, 1937 "vlddqu 1(%%rax), %%xmm8") 1938 1939 GEN_test_Monly(VLDDQU_256, 1940 "vlddqu 1(%%rax), %%ymm8") 1941 1942 GEN_test_Monly(VMOVNTDQA_128, 1943 "vmovntdqa (%%rax), %%xmm9") 1944 1945 GEN_test_Monly(VMASKMOVDQU_128, 1946 "xchgq %%rax, %%rdi;" 1947 "vmaskmovdqu %%xmm8, %%xmm9;" 1948 "xchgq %%rax, %%rdi") 1949 1950 GEN_test_Ronly(VMOVMSKPD_128, 1951 "vmovmskpd %%xmm9, %%r14d") 1952 1953 GEN_test_Ronly(VMOVMSKPD_256, 1954 "vmovmskpd %%ymm9, %%r14d") 1955 1956 GEN_test_Ronly(VMOVMSKPS_128, 1957 "vmovmskps %%xmm9, %%r14d") 1958 1959 GEN_test_Ronly(VMOVMSKPS_256, 1960 "vmovmskps %%ymm9, %%r14d") 1961 1962 GEN_test_Monly(VMOVNTPD_128, 1963 "vmovntpd %%xmm9, (%%rax)") 1964 1965 GEN_test_Monly(VMOVNTPD_256, 1966 "vmovntpd %%ymm9, (%%rax)") 1967 1968 GEN_test_Monly(VMOVNTPS_128, 1969 "vmovntps %%xmm9, (%%rax)") 1970 1971 GEN_test_Monly(VMOVNTPS_256, 1972 "vmovntps %%ymm9, (%%rax)") 1973 1974 GEN_test_RandM(VPACKSSWB_128, 1975 "vpacksswb %%xmm6, %%xmm8, %%xmm7", 1976 "vpacksswb (%%rax), %%xmm8, %%xmm7") 1977 1978 GEN_test_RandM(VPAVGB_128, 1979 "vpavgb %%xmm6, %%xmm8, %%xmm7", 1980 "vpavgb (%%rax), %%xmm8, %%xmm7") 1981 1982 GEN_test_RandM(VPAVGW_128, 1983 "vpavgw %%xmm6, %%xmm8, %%xmm7", 1984 "vpavgw (%%rax), %%xmm8, %%xmm7") 1985 1986 GEN_test_RandM(VPADDSB_128, 1987 "vpaddsb %%xmm6, %%xmm8, %%xmm7", 1988 "vpaddsb (%%rax), %%xmm8, %%xmm7") 1989 1990 GEN_test_RandM(VPADDSW_128, 1991 "vpaddsw %%xmm6, %%xmm8, %%xmm7", 1992 "vpaddsw (%%rax), %%xmm8, %%xmm7") 1993 1994 GEN_test_RandM(VPHADDW_128, 1995 "vphaddw %%xmm6, %%xmm8, %%xmm7", 1996 "vphaddw (%%rax), %%xmm8, %%xmm7") 1997 1998 GEN_test_RandM(VPHADDD_128, 1999 "vphaddd %%xmm6, %%xmm8, %%xmm7", 2000 "vphaddd (%%rax), %%xmm8, %%xmm7") 2001 2002 GEN_test_RandM(VPHADDSW_128, 2003 "vphaddsw %%xmm6, %%xmm8, %%xmm7", 2004 "vphaddsw (%%rax), %%xmm8, %%xmm7") 2005 2006 GEN_test_RandM(VPMADDUBSW_128, 2007 "vpmaddubsw %%xmm6, %%xmm8, %%xmm7", 2008 "vpmaddubsw (%%rax), %%xmm8, %%xmm7") 2009 2010 GEN_test_RandM(VPHSUBW_128, 2011 "vphsubw %%xmm6, %%xmm8, %%xmm7", 2012 "vphsubw (%%rax), %%xmm8, %%xmm7") 2013 2014 GEN_test_RandM(VPHSUBD_128, 2015 "vphsubd %%xmm6, %%xmm8, %%xmm7", 2016 "vphsubd (%%rax), %%xmm8, %%xmm7") 2017 2018 GEN_test_RandM(VPHSUBSW_128, 2019 "vphsubsw %%xmm6, %%xmm8, %%xmm7", 2020 "vphsubsw (%%rax), %%xmm8, %%xmm7") 2021 2022 GEN_test_RandM(VPABSB_128, 2023 "vpabsb %%xmm6, %%xmm7", 2024 "vpabsb (%%rax), %%xmm7") 2025 2026 GEN_test_RandM(VPABSW_128, 2027 "vpabsw %%xmm6, %%xmm7", 2028 "vpabsw (%%rax), %%xmm7") 2029 2030 GEN_test_RandM(VPMOVSXBQ_128, 2031 "vpmovsxbq %%xmm6, %%xmm8", 2032 "vpmovsxbq (%%rax), %%xmm8") 2033 2034 GEN_test_RandM(VPMOVSXWQ_128, 2035 "vpmovsxwq %%xmm6, %%xmm8", 2036 "vpmovsxwq (%%rax), %%xmm8") 2037 2038 GEN_test_RandM(VPACKUSDW_128, 2039 "vpackusdw %%xmm6, %%xmm8, %%xmm7", 2040 "vpackusdw (%%rax), %%xmm8, %%xmm7") 2041 2042 GEN_test_RandM(VPMOVZXBQ_128, 2043 "vpmovzxbq %%xmm6, %%xmm8", 2044 "vpmovzxbq (%%rax), %%xmm8") 2045 2046 GEN_test_RandM(VPMOVZXWQ_128, 2047 "vpmovzxwq %%xmm6, %%xmm8", 2048 "vpmovzxwq (%%rax), %%xmm8") 2049 2050 GEN_test_RandM(VPMOVZXDQ_128, 2051 "vpmovzxdq %%xmm6, %%xmm8", 2052 "vpmovzxdq (%%rax), %%xmm8") 2053 2054 GEN_test_RandM(VMPSADBW_128_0x0, 2055 "vmpsadbw $0, %%xmm6, %%xmm8, %%xmm7", 2056 "vmpsadbw $0, (%%rax), %%xmm8, %%xmm7") 2057 GEN_test_RandM(VMPSADBW_128_0x1, 2058 "vmpsadbw $1, %%xmm6, %%xmm8, %%xmm7", 2059 "vmpsadbw $1, (%%rax), %%xmm8, %%xmm7") 2060 GEN_test_RandM(VMPSADBW_128_0x2, 2061 "vmpsadbw $2, %%xmm6, %%xmm8, %%xmm7", 2062 "vmpsadbw $2, (%%rax), %%xmm8, %%xmm7") 2063 GEN_test_RandM(VMPSADBW_128_0x3, 2064 "vmpsadbw $3, %%xmm6, %%xmm8, %%xmm7", 2065 "vmpsadbw $3, (%%rax), %%xmm8, %%xmm7") 2066 GEN_test_RandM(VMPSADBW_128_0x4, 2067 "vmpsadbw $4, %%xmm6, %%xmm8, %%xmm7", 2068 "vmpsadbw $4, (%%rax), %%xmm8, %%xmm7") 2069 GEN_test_RandM(VMPSADBW_128_0x5, 2070 "vmpsadbw $5, %%xmm6, %%xmm8, %%xmm7", 2071 "vmpsadbw $5, (%%rax), %%xmm8, %%xmm7") 2072 GEN_test_RandM(VMPSADBW_128_0x6, 2073 "vmpsadbw $6, %%xmm6, %%xmm8, %%xmm7", 2074 "vmpsadbw $6, (%%rax), %%xmm8, %%xmm7") 2075 GEN_test_RandM(VMPSADBW_128_0x7, 2076 "vmpsadbw $7, %%xmm6, %%xmm8, %%xmm7", 2077 "vmpsadbw $7, (%%rax), %%xmm8, %%xmm7") 2078 2079 GEN_test_RandM(VMOVDDUP_YMMorMEM256_to_YMM, 2080 "vmovddup %%ymm8, %%ymm7", 2081 "vmovddup (%%rax), %%ymm9") 2082 2083 GEN_test_Monly(VMOVLPS_128_M64_XMM_XMM, "vmovlps (%%rax), %%xmm8, %%xmm7") 2084 2085 GEN_test_Monly(VMOVLPS_128_XMM_M64, "vmovlps %%xmm7, (%%rax)") 2086 2087 GEN_test_RandM(VRCPSS_128, 2088 "vrcpss %%xmm6, %%xmm8, %%xmm7", 2089 "vrcpss (%%rax), %%xmm8, %%xmm7") 2090 2091 GEN_test_RandM(VRCPPS_128, 2092 "vrcpps %%xmm6, %%xmm8", 2093 "vrcpps (%%rax), %%xmm8") 2094 2095 GEN_test_RandM(VRCPPS_256, 2096 "vrcpps %%ymm6, %%ymm8", 2097 "vrcpps (%%rax), %%ymm8") 2098 2099 GEN_test_RandM(VPSADBW_128, 2100 "vpsadbw %%xmm6, %%xmm8, %%xmm7", 2101 "vpsadbw (%%rax), %%xmm8, %%xmm7") 2102 2103 GEN_test_RandM(VPSIGNB_128, 2104 "vpsignb %%xmm6, %%xmm8, %%xmm7", 2105 "vpsignb (%%rax), %%xmm8, %%xmm7") 2106 2107 GEN_test_RandM(VPSIGNW_128, 2108 "vpsignw %%xmm6, %%xmm8, %%xmm7", 2109 "vpsignw (%%rax), %%xmm8, %%xmm7") 2110 2111 GEN_test_RandM(VPSIGND_128, 2112 "vpsignd %%xmm6, %%xmm8, %%xmm7", 2113 "vpsignd (%%rax), %%xmm8, %%xmm7") 2114 2115 GEN_test_RandM(VPMULHRSW_128, 2116 "vpmulhrsw %%xmm6, %%xmm8, %%xmm7", 2117 "vpmulhrsw (%%rax), %%xmm8, %%xmm7") 2118 2119 GEN_test_Monly(VBROADCASTF128, 2120 "vbroadcastf128 (%%rax), %%ymm9") 2121 2122 GEN_test_RandM(VPEXTRW_128_0x0, 2123 "vpextrw $0x0, %%xmm7, %%r14d", 2124 "vpextrw $0x0, %%xmm7, (%%rax)") 2125 GEN_test_RandM(VPEXTRW_128_0x1, 2126 "vpextrw $0x1, %%xmm7, %%r14d", 2127 "vpextrw $0x1, %%xmm7, (%%rax)") 2128 GEN_test_RandM(VPEXTRW_128_0x2, 2129 "vpextrw $0x2, %%xmm7, %%r14d", 2130 "vpextrw $0x2, %%xmm7, (%%rax)") 2131 GEN_test_RandM(VPEXTRW_128_0x3, 2132 "vpextrw $0x3, %%xmm7, %%r14d", 2133 "vpextrw $0x3, %%xmm7, (%%rax)") 2134 GEN_test_RandM(VPEXTRW_128_0x4, 2135 "vpextrw $0x4, %%xmm7, %%r14d", 2136 "vpextrw $0x4, %%xmm7, (%%rax)") 2137 GEN_test_RandM(VPEXTRW_128_0x5, 2138 "vpextrw $0x5, %%xmm7, %%r14d", 2139 "vpextrw $0x5, %%xmm7, (%%rax)") 2140 GEN_test_RandM(VPEXTRW_128_0x6, 2141 "vpextrw $0x6, %%xmm7, %%r14d", 2142 "vpextrw $0x6, %%xmm7, (%%rax)") 2143 GEN_test_RandM(VPEXTRW_128_0x7, 2144 "vpextrw $0x7, %%xmm7, %%r14d", 2145 "vpextrw $0x7, %%xmm7, (%%rax)") 2146 2147 GEN_test_RandM(VAESENC, 2148 "vaesenc %%xmm6, %%xmm8, %%xmm7", 2149 "vaesenc (%%rax), %%xmm8, %%xmm7") 2150 2151 GEN_test_RandM(VAESENCLAST, 2152 "vaesenclast %%xmm6, %%xmm8, %%xmm7", 2153 "vaesenclast (%%rax), %%xmm8, %%xmm7") 2154 2155 GEN_test_RandM(VAESDEC, 2156 "vaesdec %%xmm6, %%xmm8, %%xmm7", 2157 "vaesdec (%%rax), %%xmm8, %%xmm7") 2158 2159 GEN_test_RandM(VAESDECLAST, 2160 "vaesdeclast %%xmm6, %%xmm8, %%xmm7", 2161 "vaesdeclast (%%rax), %%xmm8, %%xmm7") 2162 2163 GEN_test_RandM(VAESIMC, 2164 "vaesimc %%xmm6, %%xmm7", 2165 "vaesimc (%%rax), %%xmm7") 2166 2167 GEN_test_RandM(VAESKEYGENASSIST_0x00, 2168 "vaeskeygenassist $0x00, %%xmm6, %%xmm7", 2169 "vaeskeygenassist $0x00, (%%rax), %%xmm7") 2170 GEN_test_RandM(VAESKEYGENASSIST_0x31, 2171 "vaeskeygenassist $0x31, %%xmm6, %%xmm7", 2172 "vaeskeygenassist $0x31, (%%rax), %%xmm7") 2173 GEN_test_RandM(VAESKEYGENASSIST_0xB2, 2174 "vaeskeygenassist $0xb2, %%xmm6, %%xmm7", 2175 "vaeskeygenassist $0xb2, (%%rax), %%xmm7") 2176 GEN_test_RandM(VAESKEYGENASSIST_0xFF, 2177 "vaeskeygenassist $0xFF, %%xmm6, %%xmm7", 2178 "vaeskeygenassist $0xFF, (%%rax), %%xmm7") 2179 2180 GEN_test_RandM(VPCLMULQDQ_0x00, 2181 "vpclmulqdq $0x00, %%xmm6, %%xmm8, %%xmm7", 2182 "vpclmulqdq $0x00, (%%rax), %%xmm8, %%xmm7") 2183 GEN_test_RandM(VPCLMULQDQ_0x01, 2184 "vpclmulqdq $0x01, %%xmm6, %%xmm8, %%xmm7", 2185 "vpclmulqdq $0x01, (%%rax), %%xmm8, %%xmm7") 2186 GEN_test_RandM(VPCLMULQDQ_0x10, 2187 "vpclmulqdq $0x10, %%xmm6, %%xmm8, %%xmm7", 2188 "vpclmulqdq $0x10, (%%rax), %%xmm8, %%xmm7") 2189 GEN_test_RandM(VPCLMULQDQ_0x11, 2190 "vpclmulqdq $0x11, %%xmm6, %%xmm8, %%xmm7", 2191 "vpclmulqdq $0x11, (%%rax), %%xmm8, %%xmm7") 2192 GEN_test_RandM(VPCLMULQDQ_0xFF, 2193 "vpclmulqdq $0xFF, %%xmm6, %%xmm8, %%xmm7", 2194 "vpclmulqdq $0xFF, (%%rax), %%xmm8, %%xmm7") 2195 2196 GEN_test_RandM(VCMPSS_128_0x9, 2197 "vcmpss $0x9, %%xmm6, %%xmm8, %%xmm7", 2198 "vcmpss $0x9, (%%rax), %%xmm8, %%xmm7") 2199 2200 GEN_test_Monly(VMASKMOVPS_128_LoadForm, 2201 "vmaskmovps (%%rax), %%xmm8, %%xmm7;" 2202 "vxorps %%xmm6, %%xmm6, %%xmm6;" 2203 "vmaskmovps (%%rax,%%rax,4), %%xmm6, %%xmm9") 2204 2205 GEN_test_Monly(VMASKMOVPS_256_LoadForm, 2206 "vmaskmovps (%%rax), %%ymm8, %%ymm7;" 2207 "vxorps %%ymm6, %%ymm6, %%ymm6;" 2208 "vmaskmovps (%%rax,%%rax,4), %%ymm6, %%ymm9") 2209 2210 GEN_test_Monly(VMASKMOVPD_128_LoadForm, 2211 "vmaskmovpd (%%rax), %%xmm8, %%xmm7;" 2212 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 2213 "vmaskmovpd (%%rax,%%rax,4), %%xmm6, %%xmm9") 2214 2215 GEN_test_Monly(VMASKMOVPD_256_LoadForm, 2216 "vmaskmovpd (%%rax), %%ymm8, %%ymm7;" 2217 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 2218 "vmaskmovpd (%%rax,%%rax,4), %%ymm6, %%ymm9") 2219 2220 GEN_test_Monly(VMASKMOVPS_128_StoreForm, 2221 "vmaskmovps %%xmm8, %%xmm7, (%%rax);" 2222 "vxorps %%xmm6, %%xmm6, %%xmm6;" 2223 "vmaskmovps %%xmm9, %%xmm6, (%%rax,%%rax,4)") 2224 2225 GEN_test_Monly(VMASKMOVPS_256_StoreForm, 2226 "vmaskmovps %%ymm8, %%ymm7, (%%rax);" 2227 "vxorps %%ymm6, %%ymm6, %%ymm6;" 2228 "vmaskmovps %%ymm9, %%ymm6, (%%rax,%%rax,4)") 2229 2230 GEN_test_Monly(VMASKMOVPD_128_StoreForm, 2231 "vmaskmovpd %%xmm8, %%xmm7, (%%rax);" 2232 "vxorpd %%xmm6, %%xmm6, %%xmm6;" 2233 "vmaskmovpd %%xmm9, %%xmm6, (%%rax,%%rax,4)") 2234 2235 GEN_test_Monly(VMASKMOVPD_256_StoreForm, 2236 "vmaskmovpd %%ymm8, %%ymm7, (%%rax);" 2237 "vxorpd %%ymm6, %%ymm6, %%ymm6;" 2238 "vmaskmovpd %%ymm9, %%ymm6, (%%rax,%%rax,4)") 2239 2240 /* Comment duplicated above, for convenient reference: 2241 Allowed operands in test insns: 2242 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. 2243 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. 2244 Imm8 etc fields are also allowed, where they make sense. 2245 Both forms may use ymm0 as scratch. Mem form may also use 2246 ymm6 as scratch. 2247 */ 2248 2249 #define N_DEFAULT_ITERS 3 2250 2251 // Do the specified test some number of times 2252 #define DO_N(_iters, _testfn) \ 2253 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) 2254 2255 // Do the specified test the default number of times 2256 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) 2257 2258 2259 int main ( void ) 2260 { 2261 DO_D( VMOVUPD_EtoG_256 ); 2262 DO_D( VMOVUPD_GtoE_256 ); 2263 DO_D( VPSUBW_128 ); 2264 DO_D( VPSUBQ_128 ); 2265 DO_D( VPADDQ_128 ); 2266 DO_D( VPINSRQ_128 ); 2267 DO_D( VUCOMISS_128 ); 2268 DO_D( VUCOMISD_128 ); 2269 DO_D( VCVTPS2PD_128 ); 2270 DO_D( VANDNPD_128 ); 2271 DO_D( VORPD_128 ); 2272 DO_D( VXORPD_128 ); 2273 DO_D( VXORPS_128 ); 2274 DO_D( VMULSD_128 ); 2275 DO_D( VADDSD_128 ); 2276 DO_D( VMINSD_128 ); 2277 DO_D( VSUBSD_128 ); 2278 DO_D( VDIVSD_128 ); 2279 DO_D( VMAXSD_128 ); 2280 DO_D( VPSHUFD_0x39_128 ); 2281 DO_D( VPCMPEQD_128 ); 2282 DO_D( VPEXTRD_128_0x3 ); 2283 DO_D( VPEXTRD_128_0x0 ); 2284 DO_D( VINSERTF128_0x0 ); 2285 DO_D( VINSERTF128_0x1 ); 2286 DO_D( VEXTRACTF128_0x0 ); 2287 DO_D( VEXTRACTF128_0x1 ); 2288 DO_D( VCVTPD2PS_128 ); 2289 /* Test all CMPSS variants; this code is tricky. */ 2290 DO_D( VCMPSS_128_0x0 ); 2291 DO_D( VCMPSS_128_0x1 ); 2292 DO_D( VCMPSS_128_0x2 ); 2293 DO_D( VCMPSS_128_0x3 ); 2294 DO_D( VCMPSS_128_0x4 ); 2295 DO_D( VCMPSS_128_0x5 ); 2296 DO_D( VCMPSS_128_0x6 ); 2297 DO_D( VCMPSS_128_0x7 ); 2298 DO_D( VCMPSS_128_0x8 ); 2299 DO_D( VCMPSS_128_0xA ); 2300 DO_D( VCMPSS_128_0xC ); 2301 DO_D( VCMPSS_128_0xD ); 2302 DO_D( VCMPSS_128_0xE ); 2303 DO_D( VCMPSS_128_0x10 ); 2304 DO_D( VCMPSS_128_0x11 ); 2305 DO_D( VCMPSS_128_0x12 ); 2306 DO_D( VCMPSS_128_0x13 ); 2307 DO_D( VCMPSS_128_0x14 ); 2308 DO_D( VCMPSS_128_0x15 ); 2309 DO_D( VCMPSS_128_0x16 ); 2310 DO_D( VCMPSS_128_0x17 ); 2311 DO_D( VCMPSS_128_0x18 ); 2312 DO_D( VCMPSS_128_0x19 ); 2313 DO_D( VCMPSS_128_0x1A ); 2314 DO_D( VCMPSS_128_0x1C ); 2315 DO_D( VCMPSS_128_0x1D ); 2316 DO_D( VCMPSS_128_0x1E ); 2317 DO_D( VMOVDDUP_XMMorMEM64_to_XMM ); 2318 DO_D( VMOVD_IREGorMEM32_to_XMM ); 2319 DO_D( VMOVQ_XMM_MEM64 ); 2320 DO_D( VMOVDQA_GtoE_256 ); 2321 DO_D( VMOVDQA_GtoE_128 ); 2322 DO_D( VMOVDQU_GtoE_128 ); 2323 DO_D( VMOVDQA_EtoG_256 ); 2324 DO_D( VMOVDQA_EtoG_128 ); 2325 DO_D( VMOVDQU_EtoG_128 ); 2326 DO_D( VMOVAPD_GtoE_128 ); 2327 DO_D( VMOVAPD_GtoE_256 ); 2328 DO_D( VMOVAPS_GtoE_128 ); 2329 DO_D( VMOVAPS_GtoE_256 ); 2330 DO_D( VMOVAPS_EtoG_128 ); 2331 DO_D( VMOVAPD_EtoG_256 ); 2332 DO_D( VMOVAPD_EtoG_128 ); 2333 DO_D( VMOVUPD_GtoE_128 ); 2334 DO_D( VMOVSS_XMM_M32 ); 2335 DO_D( VMOVSD_XMM_M64 ); 2336 DO_D( VMOVSS_M64_XMM ); 2337 DO_D( VMOVSD_M64_XMM ); 2338 DO_D( VINSERTPS_0x39_128 ); 2339 DO_D( VPUNPCKLDQ_128 ); 2340 DO_D( VPACKSSDW_128 ); 2341 DO_D( VPADDW_128 ); 2342 DO_D( VPSRLW_0x05_128 ); 2343 DO_D( VPSLLW_0x05_128 ); 2344 DO_D( VPUNPCKLQDQ_128 ); 2345 DO_D( VPINSRD_128 ); 2346 DO_D( VMOVD_XMM_to_MEM32 ); 2347 DO_D( VPANDN_128 ); 2348 DO_D( VPSLLDQ_0x05_128 ); 2349 DO_D( VPSRLDQ_0x05_128 ); 2350 DO_D( VPSUBUSB_128 ); 2351 DO_D( VPSUBSB_128 ); 2352 DO_D( VPSLLD_0x05_128 ); 2353 DO_D( VPSRLD_0x05_128 ); 2354 DO_D( VPSRAD_0x05_128 ); 2355 DO_D( VPUNPCKLWD_128 ); 2356 DO_D( VPUNPCKHWD_128 ); 2357 DO_D( VPADDUSB_128 ); 2358 DO_D( VPMULHUW_128 ); 2359 DO_D( VPADDUSW_128 ); 2360 DO_D( VPMULLW_128 ); 2361 DO_D( VPSHUFHW_0x39_128 ); 2362 DO_D( VPSHUFLW_0x39_128 ); 2363 DO_D( VCVTPS2DQ_128 ); 2364 DO_D( VSUBPS_128 ); 2365 DO_D( VADDPS_128 ); 2366 DO_D( VMULPS_128 ); 2367 DO_D( VMAXPS_128 ); 2368 DO_D( VMINPS_128 ); 2369 DO_D( VSHUFPS_0x39_128 ); 2370 DO_D( VPCMPEQB_128 ); 2371 DO_D( VMOVHPD_128_StoreForm ); 2372 DO_D( VPAND_128 ); 2373 DO_D( VPMOVMSKB_128 ); 2374 DO_D( VCVTTSS2SI_64 ); 2375 DO_D( VPACKUSWB_128 ); 2376 DO_D( VCVTSS2SD_128 ); 2377 DO_D( VCVTSD2SS_128 ); 2378 DO_D( VMOVD_XMM_to_IREG32 ); 2379 DO_D( VPCMPESTRM_0x45_128 ); 2380 DO_D( VMOVQ_IREGorMEM64_to_XMM ); 2381 DO_D( VMOVUPS_XMM_to_XMMorMEM ); 2382 DO_D( VMOVNTDQ_128 ); 2383 DO_D( VMOVLHPS_128 ); 2384 DO_D( VPABSD_128 ); 2385 DO_D( VMOVHLPS_128 ); 2386 DO_D( VMOVQ_XMM_to_IREG64 ); 2387 DO_D( VMOVQ_XMMorMEM64_to_XMM ); 2388 DO_D( VCVTTSS2SI_32 ); 2389 DO_D( VPUNPCKLBW_128 ); 2390 DO_D( VPUNPCKHBW_128 ); 2391 DO_D( VMULSS_128 ); 2392 DO_D( VSUBSS_128 ); 2393 DO_D( VADDSS_128 ); 2394 DO_D( VDIVSS_128 ); 2395 DO_D( VUNPCKLPS_128 ); 2396 DO_D( VCVTSI2SS_128 ); 2397 DO_D( VANDPS_128 ); 2398 DO_D( VMINSS_128 ); 2399 DO_D( VMAXSS_128 ); 2400 DO_D( VANDNPS_128 ); 2401 DO_D( VORPS_128 ); 2402 DO_D( VSQRTSD_128 ); 2403 /* Test all CMPSD variants; this code is tricky. */ 2404 DO_D( VCMPSD_128_0x0 ); 2405 DO_D( VCMPSD_128_0x1 ); 2406 DO_D( VCMPSD_128_0x2 ); 2407 DO_D( VCMPSD_128_0x3 ); 2408 DO_D( VCMPSD_128_0x4 ); 2409 DO_D( VCMPSD_128_0x5 ); 2410 DO_D( VCMPSD_128_0x6 ); 2411 DO_D( VCMPSD_128_0x7 ); 2412 DO_D( VCMPSD_128_0x8 ); 2413 DO_D( VCMPSD_128_0xA ); 2414 DO_D( VCMPSD_128_0xC ); 2415 DO_D( VCMPSD_128_0xD ); 2416 DO_D( VCMPSD_128_0xE ); 2417 DO_D( VCMPSD_128_0x10 ); 2418 DO_D( VCMPSD_128_0x11 ); 2419 DO_D( VCMPSD_128_0x12 ); 2420 DO_D( VCMPSD_128_0x13 ); 2421 DO_D( VCMPSD_128_0x14 ); 2422 DO_D( VCMPSD_128_0x15 ); 2423 DO_D( VCMPSD_128_0x16 ); 2424 DO_D( VCMPSD_128_0x17 ); 2425 DO_D( VCMPSD_128_0x18 ); 2426 DO_D( VCMPSD_128_0x19 ); 2427 DO_D( VCMPSD_128_0x1A ); 2428 DO_D( VCMPSD_128_0x1C ); 2429 DO_D( VCMPSD_128_0x1D ); 2430 DO_D( VCMPSD_128_0x1E ); 2431 DO_D( VPSHUFB_128 ); 2432 DO_D( VCVTTSD2SI_32 ); 2433 DO_D( VCVTTSD2SI_64 ); 2434 DO_D( VCVTSI2SS_64 ); 2435 DO_D( VCVTSI2SD_64 ); 2436 DO_D( VCVTSI2SD_32 ); 2437 DO_D( VPOR_128 ); 2438 DO_D( VPXOR_128 ); 2439 DO_D( VPSUBB_128 ); 2440 DO_D( VPSUBD_128 ); 2441 DO_D( VPADDD_128 ); 2442 DO_D( VPMOVZXBW_128 ); 2443 DO_D( VPMOVZXWD_128 ); 2444 DO_D( VPBLENDVB_128 ); 2445 DO_D( VPMINSD_128 ); 2446 DO_D( VPMAXSD_128 ); 2447 DO_D( VANDPD_128 ); 2448 DO_D( VMULPD_256 ); 2449 DO_D( VMOVUPD_EtoG_128 ); 2450 DO_D( VADDPD_256 ); 2451 DO_D( VSUBPD_256 ); 2452 DO_D( VDIVPD_256 ); 2453 DO_D( VPCMPEQQ_128 ); 2454 DO_D( VSUBPD_128 ); 2455 DO_D( VADDPD_128 ); 2456 DO_D( VUNPCKLPD_128 ); 2457 DO_D( VUNPCKHPD_128 ); 2458 DO_D( VUNPCKHPS_128 ); 2459 DO_D( VMOVUPS_EtoG_128 ); 2460 DO_D( VADDPS_256 ); 2461 DO_D( VSUBPS_256 ); 2462 DO_D( VMULPS_256 ); 2463 DO_D( VDIVPS_256 ); 2464 DO_D( VPCMPGTQ_128 ); 2465 DO_D( VPEXTRQ_128_0x0 ); 2466 DO_D( VPEXTRQ_128_0x1 ); 2467 DO_D( VPSRLQ_0x05_128 ); 2468 DO_D( VPMULUDQ_128 ); 2469 DO_D( VPSLLQ_0x05_128 ); 2470 DO_D( VPMAXUD_128 ); 2471 DO_D( VPMINUD_128 ); 2472 DO_D( VPMULLD_128 ); 2473 DO_D( VPMAXUW_128 ); 2474 DO_D( VPEXTRW_128_EregOnly_toG_0x0 ); 2475 DO_D( VPEXTRW_128_EregOnly_toG_0x7 ); 2476 DO_D( VPMINUW_128 ); 2477 DO_D( VPHMINPOSUW_128 ); 2478 DO_D( VPMAXSW_128 ); 2479 DO_D( VPMINSW_128 ); 2480 DO_D( VPMAXUB_128 ); 2481 DO_D( VPEXTRB_GtoE_128_0x0 ); 2482 DO_D( VPEXTRB_GtoE_128_0x1 ); 2483 DO_D( VPEXTRB_GtoE_128_0x2 ); 2484 DO_D( VPEXTRB_GtoE_128_0x3 ); 2485 DO_D( VPEXTRB_GtoE_128_0x4 ); 2486 DO_D( VPEXTRB_GtoE_128_0x9 ); 2487 DO_D( VPEXTRB_GtoE_128_0xE ); 2488 DO_D( VPEXTRB_GtoE_128_0xF ); 2489 DO_D( VPMINUB_128 ); 2490 DO_D( VPMAXSB_128 ); 2491 DO_D( VPMINSB_128 ); 2492 DO_D( VPERM2F128_0x00 ); 2493 DO_D( VPERM2F128_0xFF ); 2494 DO_D( VPERM2F128_0x30 ); 2495 DO_D( VPERM2F128_0x21 ); 2496 DO_D( VPERM2F128_0x12 ); 2497 DO_D( VPERM2F128_0x03 ); 2498 DO_D( VPERM2F128_0x85 ); 2499 DO_D( VPERM2F128_0x5A ); 2500 DO_D( VPERMILPD_256_0x0 ); 2501 DO_D( VPERMILPD_256_0xF ); 2502 DO_D( VPERMILPD_256_0xA ); 2503 DO_D( VPERMILPD_256_0x5 ); 2504 DO_D( VPERMILPD_128_0x0 ); 2505 DO_D( VPERMILPD_128_0x3 ); 2506 DO_D( VUNPCKLPD_256 ); 2507 DO_D( VUNPCKHPD_256 ); 2508 DO_D( VSHUFPS_0x39_256 ); 2509 DO_D( VUNPCKLPS_256 ); 2510 DO_D( VUNPCKHPS_256 ); 2511 DO_D( VXORPD_256 ); 2512 DO_D( VBROADCASTSD_256 ); 2513 DO_D( VCMPPD_128_0x4 ); 2514 DO_D( VCVTDQ2PD_128 ); 2515 DO_D( VDIVPD_128 ); 2516 DO_D( VANDPD_256 ); 2517 DO_D( VPMOVSXBW_128 ); 2518 DO_D( VPSUBUSW_128 ); 2519 DO_D( VPSUBSW_128 ); 2520 DO_D( VPCMPEQW_128 ); 2521 DO_D( VPADDB_128 ); 2522 DO_D( VMOVAPS_EtoG_256 ); 2523 DO_D( VCVTDQ2PD_256 ); 2524 DO_D( VMOVHPD_128_LoadForm ); 2525 DO_D( VCVTPD2PS_256 ); 2526 DO_D( VPUNPCKHDQ_128 ); 2527 DO_D( VBROADCASTSS_128 ); 2528 DO_D( VPMOVSXDQ_128 ); 2529 DO_D( VPMOVSXWD_128 ); 2530 DO_D( VDIVPS_128 ); 2531 DO_D( VANDPS_256 ); 2532 DO_D( VXORPS_256 ); 2533 DO_D( VORPS_256 ); 2534 DO_D( VANDNPD_256 ); 2535 DO_D( VANDNPS_256 ); 2536 DO_D( VORPD_256 ); 2537 DO_D( VPERMILPS_256_0x0F ); 2538 DO_D( VPERMILPS_256_0xFA ); 2539 DO_D( VPERMILPS_256_0xA3 ); 2540 DO_D( VPERMILPS_256_0x5A ); 2541 DO_D( VPMULHW_128 ); 2542 DO_D( VPUNPCKHQDQ_128 ); 2543 DO_D( VPSRAW_0x05_128 ); 2544 DO_D( VPCMPGTD_128 ); 2545 DO_D( VPMOVZXBD_128 ); 2546 DO_D( VPMOVSXBD_128 ); 2547 DO_D( VPINSRB_128_1of3 ); 2548 DO_D( VPINSRB_128_2of3 ); 2549 DO_D( VPINSRB_128_3of3 ); 2550 DO_D( VCOMISD_128 ); 2551 DO_D( VCOMISS_128 ); 2552 DO_D( VMOVUPS_YMM_to_YMMorMEM ); 2553 DO_D( VDPPD_128_1of4 ); 2554 DO_D( VDPPD_128_2of4 ); 2555 DO_D( VDPPD_128_3of4 ); 2556 DO_D( VDPPD_128_4of4 ); 2557 DO_D( VPINSRW_128_1of4 ); 2558 DO_D( VPINSRW_128_2of4 ); 2559 DO_D( VPINSRW_128_3of4 ); 2560 DO_D( VPINSRW_128_4of4 ); 2561 DO_D( VBROADCASTSS_256 ); 2562 DO_D( VPALIGNR_128_1of3 ); 2563 DO_D( VPALIGNR_128_2of3 ); 2564 DO_D( VPALIGNR_128_3of3 ); 2565 DO_D( VMOVSD_REG_XMM ); 2566 DO_D( VMOVSS_REG_XMM ); 2567 DO_D( VMOVLPD_128_M64_XMM_XMM ); 2568 DO_D( VMOVLPD_128_XMM_M64 ); 2569 DO_D( VSHUFPD_128_1of2 ); 2570 DO_D( VSHUFPD_128_2of2 ); 2571 DO_D( VSHUFPD_256_1of2 ); 2572 DO_D( VSHUFPD_256_2of2 ); 2573 DO_D( VPERMILPS_128_0x00 ); 2574 DO_D( VPERMILPS_128_0xFE ); 2575 DO_D( VPERMILPS_128_0x30 ); 2576 DO_D( VPERMILPS_128_0x21 ); 2577 DO_D( VPERMILPS_128_0xD7 ); 2578 DO_D( VPERMILPS_128_0xB5 ); 2579 DO_D( VPERMILPS_128_0x85 ); 2580 DO_D( VPERMILPS_128_0x29 ); 2581 DO_D( VBLENDPS_128_1of3 ); 2582 DO_D( VBLENDPS_128_2of3 ); 2583 DO_D( VBLENDPS_128_3of3 ); 2584 DO_D( VBLENDPD_128_1of2 ); 2585 DO_D( VBLENDPD_128_2of2 ); 2586 DO_D( VBLENDPD_256_1of3 ); 2587 DO_D( VBLENDPD_256_2of3 ); 2588 DO_D( VBLENDPD_256_3of3 ); 2589 DO_D( VPBLENDW_128_0x00 ); 2590 DO_D( VPBLENDW_128_0xFE ); 2591 DO_D( VPBLENDW_128_0x30 ); 2592 DO_D( VPBLENDW_128_0x21 ); 2593 DO_D( VPBLENDW_128_0xD7 ); 2594 DO_D( VPBLENDW_128_0xB5 ); 2595 DO_D( VPBLENDW_128_0x85 ); 2596 DO_D( VPBLENDW_128_0x29 ); 2597 DO_D( VMOVUPS_EtoG_256 ); 2598 DO_D( VSQRTSS_128 ); 2599 DO_D( VSQRTPS_128 ); 2600 DO_D( VSQRTPS_256 ); 2601 DO_D( VSQRTPD_128 ); 2602 DO_D( VSQRTPD_256 ); 2603 DO_D( VRSQRTSS_128 ); 2604 DO_D( VRSQRTPS_128 ); 2605 DO_D( VRSQRTPS_256 ); 2606 DO_D( VMOVDQU_GtoE_256 ); 2607 DO_D( VCVTPS2PD_256 ); 2608 DO_D( VCVTTPS2DQ_128 ); 2609 DO_D( VCVTTPS2DQ_256 ); 2610 DO_D( VCVTDQ2PS_128 ); 2611 DO_D( VCVTDQ2PS_256 ); 2612 DO_D( VCVTTPD2DQ_128 ); 2613 DO_D( VCVTTPD2DQ_256 ); 2614 DO_D( VCVTPD2DQ_128 ); 2615 DO_D( VCVTPD2DQ_256 ); 2616 DO_D( VMOVSLDUP_128 ); 2617 DO_D( VMOVSLDUP_256 ); 2618 DO_D( VMOVSHDUP_128 ); 2619 DO_D( VMOVSHDUP_256 ); 2620 DO_D( VPERMILPS_VAR_128 ); 2621 DO_D( VPERMILPD_VAR_128 ); 2622 DO_D( VPERMILPS_VAR_256 ); 2623 DO_D( VPERMILPD_VAR_256 ); 2624 DO_D( VPSLLW_128 ); 2625 DO_D( VPSRLW_128 ); 2626 DO_D( VPSRAW_128 ); 2627 DO_D( VPSLLD_128 ); 2628 DO_D( VPSRLD_128 ); 2629 DO_D( VPSRAD_128 ); 2630 DO_D( VPSLLQ_128 ); 2631 DO_D( VPSRLQ_128 ); 2632 DO_D( VROUNDPS_128_0x0 ); 2633 DO_D( VROUNDPS_128_0x1 ); 2634 DO_D( VROUNDPS_128_0x2 ); 2635 DO_D( VROUNDPS_128_0x3 ); 2636 DO_D( VROUNDPS_128_0x4 ); 2637 DO_D( VROUNDPS_256_0x0 ); 2638 DO_D( VROUNDPS_256_0x1 ); 2639 DO_D( VROUNDPS_256_0x2 ); 2640 DO_D( VROUNDPS_256_0x3 ); 2641 DO_D( VROUNDPS_256_0x4 ); 2642 DO_D( VROUNDPD_128_0x0 ); 2643 DO_D( VROUNDPD_128_0x1 ); 2644 DO_D( VROUNDPD_128_0x2 ); 2645 DO_D( VROUNDPD_128_0x3 ); 2646 DO_D( VROUNDPD_128_0x4 ); 2647 DO_D( VROUNDPD_256_0x0 ); 2648 DO_D( VROUNDPD_256_0x1 ); 2649 DO_D( VROUNDPD_256_0x2 ); 2650 DO_D( VROUNDPD_256_0x3 ); 2651 DO_D( VROUNDPD_256_0x4 ); 2652 DO_D( VROUNDSS_0x0 ); 2653 DO_D( VROUNDSS_0x1 ); 2654 DO_D( VROUNDSS_0x2 ); 2655 DO_D( VROUNDSS_0x3 ); 2656 DO_D( VROUNDSS_0x4 ); 2657 DO_D( VROUNDSS_0x5 ); 2658 DO_D( VROUNDSD_0x0 ); 2659 DO_D( VROUNDSD_0x1 ); 2660 DO_D( VROUNDSD_0x2 ); 2661 DO_D( VROUNDSD_0x3 ); 2662 DO_D( VROUNDSD_0x4 ); 2663 DO_D( VROUNDSD_0x5 ); 2664 DO_D( VPTEST_128_1 ); 2665 DO_D( VPTEST_128_2 ); 2666 DO_D( VPTEST_256_1 ); 2667 DO_D( VPTEST_256_2 ); 2668 DO_D( VTESTPS_128_1 ); 2669 DO_D( VTESTPS_128_2 ); 2670 DO_N( 10, VTESTPS_128_3 ); 2671 DO_D( VTESTPS_256_1 ); 2672 DO_D( VTESTPS_256_2 ); 2673 DO_N( 10, VTESTPS_256_3 ); 2674 DO_D( VTESTPD_128_1 ); 2675 DO_D( VTESTPD_128_2 ); 2676 DO_N( 10, VTESTPD_128_3 ); 2677 DO_D( VTESTPD_256_1 ); 2678 DO_D( VTESTPD_256_2 ); 2679 DO_N( 10, VTESTPD_256_3 ); 2680 DO_D( VBLENDVPS_128 ); 2681 DO_D( VBLENDVPS_256 ); 2682 DO_D( VBLENDVPD_128 ); 2683 DO_D( VBLENDVPD_256 ); 2684 DO_D( VPMULDQ_128 ); 2685 DO_D( VCMPPD_256_0x4 ); 2686 DO_D( VCMPPS_128_0x4 ); 2687 DO_D( VCMPPS_256_0x4 ); 2688 DO_D( VPCMPGTB_128 ); 2689 DO_D( VPCMPGTW_128 ); 2690 DO_D( VPMADDWD_128 ); 2691 DO_D( VADDSUBPS_128 ); 2692 DO_D( VADDSUBPS_256 ); 2693 DO_D( VADDSUBPD_128 ); 2694 DO_D( VADDSUBPD_256 ); 2695 DO_D( VCVTSS2SI_64 ); 2696 DO_D( VCVTSS2SI_32 ); 2697 DO_D( VCVTSD2SI_32 ); 2698 DO_D( VCVTSD2SI_64 ); 2699 DO_D( VDPPS_128_1of4 ); 2700 DO_D( VDPPS_128_2of4 ); 2701 DO_D( VDPPS_128_3of4 ); 2702 DO_D( VDPPS_128_4of4 ); 2703 DO_D( VDPPS_256_1of4 ); 2704 DO_D( VDPPS_256_2of4 ); 2705 DO_D( VDPPS_256_3of4 ); 2706 DO_D( VDPPS_256_4of4 ); 2707 DO_D( VHADDPS_128 ); 2708 DO_D( VHADDPS_256 ); 2709 DO_D( VHADDPD_128 ); 2710 DO_D( VHADDPD_256 ); 2711 DO_D( VHSUBPS_128 ); 2712 DO_D( VHSUBPS_256 ); 2713 DO_D( VHSUBPD_128 ); 2714 DO_D( VHSUBPD_256 ); 2715 DO_D( VEXTRACTPS_0x0 ); 2716 DO_D( VEXTRACTPS_0x1 ); 2717 DO_D( VEXTRACTPS_0x2 ); 2718 DO_D( VEXTRACTPS_0x3 ); 2719 DO_D( VLDDQU_128 ); 2720 DO_D( VLDDQU_256 ); 2721 DO_D( VMAXPS_256 ); 2722 DO_D( VMAXPD_128 ); 2723 DO_D( VMAXPD_256 ); 2724 DO_D( VMINPS_256 ); 2725 DO_D( VMINPD_128 ); 2726 DO_D( VMINPD_256 ); 2727 DO_D( VMOVHPS_128_StoreForm ); 2728 DO_D( VMOVNTDQ_256 ); 2729 DO_D( VMOVHPS_128_LoadForm ); 2730 DO_D( VMOVNTDQA_128 ); 2731 DO_D( VMASKMOVDQU_128 ); 2732 DO_D( VMOVMSKPD_128 ); 2733 DO_D( VMOVMSKPD_256 ); 2734 DO_D( VMOVMSKPS_128 ); 2735 DO_D( VMOVMSKPS_256 ); 2736 DO_D( VMOVNTPD_128 ); 2737 DO_D( VMOVNTPD_256 ); 2738 DO_D( VMOVNTPS_128 ); 2739 DO_D( VMOVNTPS_256 ); 2740 DO_D( VPACKSSWB_128 ); 2741 DO_D( VPAVGB_128 ); 2742 DO_D( VPAVGW_128 ); 2743 DO_D( VPADDSB_128 ); 2744 DO_D( VPADDSW_128 ); 2745 DO_D( VPHADDW_128 ); 2746 DO_D( VPHADDD_128 ); 2747 DO_D( VPHADDSW_128 ); 2748 DO_D( VPMADDUBSW_128 ); 2749 DO_D( VPHSUBW_128 ); 2750 DO_D( VPHSUBD_128 ); 2751 DO_D( VPHSUBSW_128 ); 2752 DO_D( VPABSB_128 ); 2753 DO_D( VPABSW_128 ); 2754 DO_D( VPMOVSXBQ_128 ); 2755 DO_D( VPMOVSXWQ_128 ); 2756 DO_D( VPACKUSDW_128 ); 2757 DO_D( VPMOVZXBQ_128 ); 2758 DO_D( VPMOVZXWQ_128 ); 2759 DO_D( VPMOVZXDQ_128 ); 2760 DO_D( VMPSADBW_128_0x0 ); 2761 DO_D( VMPSADBW_128_0x1 ); 2762 DO_D( VMPSADBW_128_0x2 ); 2763 DO_D( VMPSADBW_128_0x3 ); 2764 DO_D( VMPSADBW_128_0x4 ); 2765 DO_D( VMPSADBW_128_0x5 ); 2766 DO_D( VMPSADBW_128_0x6 ); 2767 DO_D( VMPSADBW_128_0x7 ); 2768 DO_D( VMOVDDUP_YMMorMEM256_to_YMM ); 2769 DO_D( VMOVLPS_128_M64_XMM_XMM ); 2770 DO_D( VMOVLPS_128_XMM_M64 ); 2771 DO_D( VRCPSS_128 ); 2772 DO_D( VRCPPS_128 ); 2773 DO_D( VRCPPS_256 ); 2774 DO_D( VPSADBW_128 ); 2775 DO_D( VPSIGNB_128 ); 2776 DO_D( VPSIGNW_128 ); 2777 DO_D( VPSIGND_128 ); 2778 DO_D( VPMULHRSW_128 ); 2779 DO_D( VBROADCASTF128 ); 2780 DO_D( VPEXTRW_128_0x0 ); 2781 DO_D( VPEXTRW_128_0x1 ); 2782 DO_D( VPEXTRW_128_0x2 ); 2783 DO_D( VPEXTRW_128_0x3 ); 2784 DO_D( VPEXTRW_128_0x4 ); 2785 DO_D( VPEXTRW_128_0x5 ); 2786 DO_D( VPEXTRW_128_0x6 ); 2787 DO_D( VPEXTRW_128_0x7 ); 2788 DO_D( VAESENC ); 2789 DO_D( VAESENCLAST ); 2790 DO_D( VAESDEC ); 2791 DO_D( VAESDECLAST ); 2792 DO_D( VAESIMC ); 2793 DO_D( VAESKEYGENASSIST_0x00 ); 2794 DO_D( VAESKEYGENASSIST_0x31 ); 2795 DO_D( VAESKEYGENASSIST_0xB2 ); 2796 DO_D( VAESKEYGENASSIST_0xFF ); 2797 DO_D( VPCLMULQDQ_0x00 ); 2798 DO_D( VPCLMULQDQ_0x01 ); 2799 DO_D( VPCLMULQDQ_0x10 ); 2800 DO_D( VPCLMULQDQ_0x11 ); 2801 DO_D( VPCLMULQDQ_0xFF ); 2802 DO_D( VCMPSS_128_0x9 ); 2803 DO_D( VMASKMOVPS_128_LoadForm ); 2804 DO_D( VMASKMOVPS_256_LoadForm ); 2805 DO_D( VMASKMOVPD_128_LoadForm ); 2806 DO_D( VMASKMOVPD_256_LoadForm ); 2807 DO_D( VMASKMOVPS_128_StoreForm ); 2808 DO_D( VMASKMOVPS_256_StoreForm ); 2809 DO_D( VMASKMOVPD_128_StoreForm ); 2810 DO_D( VMASKMOVPD_256_StoreForm ); 2811 return 0; 2812 } 2813 2814