#include #include #include #include typedef unsigned char UChar; typedef unsigned int UInt; typedef unsigned long int UWord; typedef unsigned long long int ULong; UChar randArray[1027] __attribute__((used)); #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr))) typedef union { UChar u8[32]; UInt u32[8]; } YMM; typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block; void showYMM ( YMM* vec ) { int i; assert(IS_32_ALIGNED(vec)); for (i = 31; i >= 0; i--) { printf("%02x", (UInt)vec->u8[i]); if (i > 0 && 0 == ((i+0) & 7)) printf("."); } } void showBlock ( char* msg, Block* block ) { printf(" %s\n", msg); printf(" "); showYMM(&block->a1); printf("\n"); printf(" "); showYMM(&block->a2); printf("\n"); printf(" "); showYMM(&block->a3); printf("\n"); printf(" "); showYMM(&block->a4); printf("\n"); printf(" %016llx\n", block->u64); } UChar randUChar ( void ) { static UInt seed = 80021; seed = 1103515245 * seed + 12345; return (seed >> 17) & 0xFF; } void randBlock ( Block* b ) { int i; UChar* p = (UChar*)b; for (i = 0; i < sizeof(Block); i++) p[i] = randUChar(); } /* Generate a function test_NAME, that tests the given insn, in both its mem and reg forms. The reg form of the insn may mention, as operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9 and %r14. It's OK for the insn to clobber ymm0, as this is needed for testing PCMPxSTRx, and ymm6, as this is needed for testing MOVMASK variants. */ #define GEN_test_RandM(_name, _reg_form, _mem_form) \ \ __attribute__ ((noinline)) static void test_##_name ( void ) \ { \ Block* b = memalign(32, sizeof(Block)); \ randBlock(b); \ printf("%s(reg)\n", #_name); \ showBlock("before", b); \ __asm__ __volatile__( \ "vmovdqa 0(%0),%%ymm7" "\n\t" \ "vmovdqa 32(%0),%%ymm8" "\n\t" \ "vmovdqa 64(%0),%%ymm6" "\n\t" \ "vmovdqa 96(%0),%%ymm9" "\n\t" \ "movq 128(%0),%%r14" "\n\t" \ _reg_form "\n\t" \ "vmovdqa %%ymm7, 0(%0)" "\n\t" \ "vmovdqa %%ymm8, 32(%0)" "\n\t" \ "vmovdqa %%ymm6, 64(%0)" "\n\t" \ "vmovdqa %%ymm9, 96(%0)" "\n\t" \ "movq %%r14, 128(%0)" "\n\t" \ : /*OUT*/ \ : /*IN*/"r"(b) \ : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \ ); \ showBlock("after", b); \ randBlock(b); \ printf("%s(mem)\n", #_name); \ showBlock("before", b); \ __asm__ __volatile__( \ "leaq 0(%0),%%rax" "\n\t" \ "vmovdqa 32(%0),%%ymm8" "\n\t" \ "vmovdqa 64(%0),%%ymm7" "\n\t" \ "vmovdqa 96(%0),%%ymm9" "\n\t" \ "movq 128(%0),%%r14" "\n\t" \ _mem_form "\n\t" \ "vmovdqa %%ymm8, 32(%0)" "\n\t" \ "vmovdqa %%ymm7, 64(%0)" "\n\t" \ "vmovdqa %%ymm9, 96(%0)" "\n\t" \ "movq %%r14, 128(%0)" "\n\t" \ : /*OUT*/ \ : /*IN*/"r"(b) \ : /*TRASH*/"xmm6", \ "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \ ); \ showBlock("after", b); \ printf("\n"); \ free(b); \ } #define GEN_test_Ronly(_name, _reg_form) \ GEN_test_RandM(_name, _reg_form, "") #define GEN_test_Monly(_name, _mem_form) \ GEN_test_RandM(_name, "", _mem_form) /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */ GEN_test_RandM(VPOR_256, "vpor %%ymm6, %%ymm8, %%ymm7", "vpor (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPXOR_256, "vpxor %%ymm6, %%ymm8, %%ymm7", "vpxor (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBB_256, "vpsubb %%ymm6, %%ymm8, %%ymm7", "vpsubb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBD_256, "vpsubd %%ymm6, %%ymm8, %%ymm7", "vpsubd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDD_256, "vpaddd %%ymm6, %%ymm8, %%ymm7", "vpaddd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMOVZXWD_256, "vpmovzxwd %%xmm6, %%ymm8", "vpmovzxwd (%%rax), %%ymm8") GEN_test_RandM(VPMOVZXBW_256, "vpmovzxbw %%xmm6, %%ymm8", "vpmovzxbw (%%rax), %%ymm8") GEN_test_RandM(VPBLENDVB_256, "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7", "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINSD_256, "vpminsd %%ymm6, %%ymm8, %%ymm7", "vpminsd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMAXSD_256, "vpmaxsd %%ymm6, %%ymm8, %%ymm7", "vpmaxsd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSHUFB_256, "vpshufb %%ymm6, %%ymm8, %%ymm7", "vpshufb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKLBW_256, "vpunpcklbw %%ymm6, %%ymm8, %%ymm7", "vpunpcklbw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKHBW_256, "vpunpckhbw %%ymm6, %%ymm8, %%ymm7", "vpunpckhbw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPABSD_256, "vpabsd %%ymm6, %%ymm8", "vpabsd (%%rax), %%ymm8") GEN_test_RandM(VPACKUSWB_256, "vpackuswb %%ymm9, %%ymm8, %%ymm7", "vpackuswb (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPMOVMSKB_256, "vpmovmskb %%ymm8, %%r14") GEN_test_RandM(VPAND_256, "vpand %%ymm9, %%ymm8, %%ymm7", "vpand (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPEQB_256, "vpcmpeqb %%ymm9, %%ymm8, %%ymm7", "vpcmpeqb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSHUFLW_0x39_256, "vpshuflw $0x39, %%ymm9, %%ymm7", "vpshuflw $0xC6, (%%rax), %%ymm8") GEN_test_RandM(VPSHUFHW_0x39_256, "vpshufhw $0x39, %%ymm9, %%ymm7", "vpshufhw $0xC6, (%%rax), %%ymm8") GEN_test_RandM(VPMULLW_256, "vpmullw %%ymm9, %%ymm8, %%ymm7", "vpmullw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDUSW_256, "vpaddusw %%ymm9, %%ymm8, %%ymm7", "vpaddusw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMULHUW_256, "vpmulhuw %%ymm9, %%ymm8, %%ymm7", "vpmulhuw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDUSB_256, "vpaddusb %%ymm9, %%ymm8, %%ymm7", "vpaddusb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKLWD_256, "vpunpcklwd %%ymm6, %%ymm8, %%ymm7", "vpunpcklwd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKHWD_256, "vpunpckhwd %%ymm6, %%ymm8, %%ymm7", "vpunpckhwd (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSLLD_0x05_256, "vpslld $0x5, %%ymm9, %%ymm7") GEN_test_Ronly(VPSRLD_0x05_256, "vpsrld $0x5, %%ymm9, %%ymm7") GEN_test_Ronly(VPSRAD_0x05_256, "vpsrad $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPSUBUSB_256, "vpsubusb %%ymm9, %%ymm8, %%ymm7", "vpsubusb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBSB_256, "vpsubsb %%ymm9, %%ymm8, %%ymm7", "vpsubsb (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSRLDQ_0x05_256, "vpsrldq $0x5, %%ymm9, %%ymm7") GEN_test_Ronly(VPSLLDQ_0x05_256, "vpslldq $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPANDN_256, "vpandn %%ymm9, %%ymm8, %%ymm7", "vpandn (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKLQDQ_256, "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7", "vpunpcklqdq (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSRLW_0x05_256, "vpsrlw $0x5, %%ymm9, %%ymm7") GEN_test_Ronly(VPSLLW_0x05_256, "vpsllw $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPADDW_256, "vpaddw %%ymm6, %%ymm8, %%ymm7", "vpaddw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPACKSSDW_256, "vpackssdw %%ymm9, %%ymm8, %%ymm7", "vpackssdw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKLDQ_256, "vpunpckldq %%ymm6, %%ymm8, %%ymm7", "vpunpckldq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPEQD_256, "vpcmpeqd %%ymm6, %%ymm8, %%ymm7", "vpcmpeqd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSHUFD_0x39_256, "vpshufd $0x39, %%ymm9, %%ymm8", "vpshufd $0xC6, (%%rax), %%ymm7") GEN_test_RandM(VPADDQ_256, "vpaddq %%ymm6, %%ymm8, %%ymm7", "vpaddq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBQ_256, "vpsubq %%ymm6, %%ymm8, %%ymm7", "vpsubq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBW_256, "vpsubw %%ymm6, %%ymm8, %%ymm7", "vpsubw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPEQQ_256, "vpcmpeqq %%ymm6, %%ymm8, %%ymm7", "vpcmpeqq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPGTQ_256, "vpcmpgtq %%ymm6, %%ymm8, %%ymm7", "vpcmpgtq (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSRLQ_0x05_256, "vpsrlq $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPMULUDQ_256, "vpmuludq %%ymm6, %%ymm8, %%ymm7", "vpmuludq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMULDQ_256, "vpmuldq %%ymm6, %%ymm8, %%ymm7", "vpmuldq (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSLLQ_0x05_256, "vpsllq $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPMAXUD_256, "vpmaxud %%ymm6, %%ymm8, %%ymm7", "vpmaxud (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINUD_256, "vpminud %%ymm6, %%ymm8, %%ymm7", "vpminud (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMULLD_256, "vpmulld %%ymm6, %%ymm8, %%ymm7", "vpmulld (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMAXUW_256, "vpmaxuw %%ymm6, %%ymm8, %%ymm7", "vpmaxuw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINUW_256, "vpminuw %%ymm6, %%ymm8, %%ymm7", "vpminuw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMAXSW_256, "vpmaxsw %%ymm6, %%ymm8, %%ymm7", "vpmaxsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINSW_256, "vpminsw %%ymm6, %%ymm8, %%ymm7", "vpminsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMAXUB_256, "vpmaxub %%ymm6, %%ymm8, %%ymm7", "vpmaxub (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINUB_256, "vpminub %%ymm6, %%ymm8, %%ymm7", "vpminub (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMAXSB_256, "vpmaxsb %%ymm6, %%ymm8, %%ymm7", "vpmaxsb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMINSB_256, "vpminsb %%ymm6, %%ymm8, %%ymm7", "vpminsb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMOVSXBW_256, "vpmovsxbw %%xmm6, %%ymm8", "vpmovsxbw (%%rax), %%ymm8") GEN_test_RandM(VPSUBUSW_256, "vpsubusw %%ymm9, %%ymm8, %%ymm7", "vpsubusw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSUBSW_256, "vpsubsw %%ymm9, %%ymm8, %%ymm7", "vpsubsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPEQW_256, "vpcmpeqw %%ymm6, %%ymm8, %%ymm7", "vpcmpeqw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDB_256, "vpaddb %%ymm6, %%ymm8, %%ymm7", "vpaddb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKHDQ_256, "vpunpckhdq %%ymm6, %%ymm8, %%ymm7", "vpunpckhdq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMOVSXDQ_256, "vpmovsxdq %%xmm6, %%ymm8", "vpmovsxdq (%%rax), %%ymm8") GEN_test_RandM(VPMOVSXWD_256, "vpmovsxwd %%xmm6, %%ymm8", "vpmovsxwd (%%rax), %%ymm8") GEN_test_RandM(VPMULHW_256, "vpmulhw %%ymm9, %%ymm8, %%ymm7", "vpmulhw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPUNPCKHQDQ_256, "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7", "vpunpckhqdq (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VPSRAW_0x05_256, "vpsraw $0x5, %%ymm9, %%ymm7") GEN_test_RandM(VPCMPGTB_256, "vpcmpgtb %%ymm6, %%ymm8, %%ymm7", "vpcmpgtb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPGTW_256, "vpcmpgtw %%ymm6, %%ymm8, %%ymm7", "vpcmpgtw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPCMPGTD_256, "vpcmpgtd %%ymm6, %%ymm8, %%ymm7", "vpcmpgtd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMOVZXBD_256, "vpmovzxbd %%xmm6, %%ymm8", "vpmovzxbd (%%rax), %%ymm8") GEN_test_RandM(VPMOVSXBD_256, "vpmovsxbd %%xmm6, %%ymm8", "vpmovsxbd (%%rax), %%ymm8") GEN_test_RandM(VPALIGNR_256_1of3, "vpalignr $0, %%ymm6, %%ymm8, %%ymm7", "vpalignr $3, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPALIGNR_256_2of3, "vpalignr $6, %%ymm6, %%ymm8, %%ymm7", "vpalignr $9, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPALIGNR_256_3of3, "vpalignr $12, %%ymm6, %%ymm8, %%ymm7", "vpalignr $15, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0x00, "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0xFE, "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0x30, "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0x21, "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0xD7, "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0xB5, "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0x85, "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDW_256_0x29, "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7", "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSLLW_256, "andl $15, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsllw %%xmm6, %%ymm8, %%ymm9", "andq $15, 128(%%rax);" "vpsllw 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSRLW_256, "andl $15, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsrlw %%xmm6, %%ymm8, %%ymm9", "andq $15, 128(%%rax);" "vpsrlw 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSRAW_256, "andl $31, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsraw %%xmm6, %%ymm8, %%ymm9", "andq $15, 128(%%rax);" "vpsraw 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSLLD_256, "andl $31, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpslld %%xmm6, %%ymm8, %%ymm9", "andq $31, 128(%%rax);" "vpslld 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSRLD_256, "andl $31, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsrld %%xmm6, %%ymm8, %%ymm9", "andq $31, 128(%%rax);" "vpsrld 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSRAD_256, "andl $31, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsrad %%xmm6, %%ymm8, %%ymm9", "andq $31, 128(%%rax);" "vpsrad 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSLLQ_256, "andl $63, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsllq %%xmm6, %%ymm8, %%ymm9", "andq $63, 128(%%rax);" "vpsllq 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPSRLQ_256, "andl $63, %%r14d;" "vmovd %%r14d, %%xmm6;" "vpsrlq %%xmm6, %%ymm8, %%ymm9", "andq $63, 128(%%rax);" "vpsrlq 128(%%rax), %%ymm8, %%ymm9") GEN_test_RandM(VPMADDWD_256, "vpmaddwd %%ymm6, %%ymm8, %%ymm7", "vpmaddwd (%%rax), %%ymm8, %%ymm7") GEN_test_Monly(VMOVNTDQA_256, "vmovntdqa (%%rax), %%ymm9") GEN_test_RandM(VPACKSSWB_256, "vpacksswb %%ymm6, %%ymm8, %%ymm7", "vpacksswb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPAVGB_256, "vpavgb %%ymm6, %%ymm8, %%ymm7", "vpavgb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPAVGW_256, "vpavgw %%ymm6, %%ymm8, %%ymm7", "vpavgw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDSB_256, "vpaddsb %%ymm6, %%ymm8, %%ymm7", "vpaddsb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPADDSW_256, "vpaddsw %%ymm6, %%ymm8, %%ymm7", "vpaddsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHADDW_256, "vphaddw %%ymm6, %%ymm8, %%ymm7", "vphaddw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHADDD_256, "vphaddd %%ymm6, %%ymm8, %%ymm7", "vphaddd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHADDSW_256, "vphaddsw %%ymm6, %%ymm8, %%ymm7", "vphaddsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMADDUBSW_256, "vpmaddubsw %%ymm6, %%ymm8, %%ymm7", "vpmaddubsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHSUBW_256, "vphsubw %%ymm6, %%ymm8, %%ymm7", "vphsubw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHSUBD_256, "vphsubd %%ymm6, %%ymm8, %%ymm7", "vphsubd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPHSUBSW_256, "vphsubsw %%ymm6, %%ymm8, %%ymm7", "vphsubsw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPABSB_256, "vpabsb %%ymm6, %%ymm7", "vpabsb (%%rax), %%ymm7") GEN_test_RandM(VPABSW_256, "vpabsw %%ymm6, %%ymm7", "vpabsw (%%rax), %%ymm7") GEN_test_RandM(VPMOVSXBQ_256, "vpmovsxbq %%xmm6, %%ymm8", "vpmovsxbq (%%rax), %%ymm8") GEN_test_RandM(VPMOVSXWQ_256, "vpmovsxwq %%xmm6, %%ymm8", "vpmovsxwq (%%rax), %%ymm8") GEN_test_RandM(VPACKUSDW_256, "vpackusdw %%ymm6, %%ymm8, %%ymm7", "vpackusdw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMOVZXBQ_256, "vpmovzxbq %%xmm6, %%ymm8", "vpmovzxbq (%%rax), %%ymm8") GEN_test_RandM(VPMOVZXWQ_256, "vpmovzxwq %%xmm6, %%ymm8", "vpmovzxwq (%%rax), %%ymm8") GEN_test_RandM(VPMOVZXDQ_256, "vpmovzxdq %%xmm6, %%ymm8", "vpmovzxdq (%%rax), %%ymm8") GEN_test_RandM(VMPSADBW_256_0x0, "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x39, "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x32, "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x2b, "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x24, "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x1d, "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x16, "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VMPSADBW_256_0x0f, "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7", "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSADBW_256, "vpsadbw %%ymm6, %%ymm8, %%ymm7", "vpsadbw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSIGNB_256, "vpsignb %%ymm6, %%ymm8, %%ymm7", "vpsignb (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSIGNW_256, "vpsignw %%ymm6, %%ymm8, %%ymm7", "vpsignw (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSIGND_256, "vpsignd %%ymm6, %%ymm8, %%ymm7", "vpsignd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPMULHRSW_256, "vpmulhrsw %%ymm6, %%ymm8, %%ymm7", "vpmulhrsw (%%rax), %%ymm8, %%ymm7") /* Instructions new in AVX2. */ GEN_test_Monly(VBROADCASTI128, "vbroadcasti128 (%%rax), %%ymm9") GEN_test_RandM(VEXTRACTI128_0x0, "vextracti128 $0x0, %%ymm7, %%xmm9", "vextracti128 $0x0, %%ymm7, (%%rax)") GEN_test_RandM(VEXTRACTI128_0x1, "vextracti128 $0x1, %%ymm7, %%xmm9", "vextracti128 $0x1, %%ymm7, (%%rax)") GEN_test_RandM(VINSERTI128_0x0, "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8", "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8") GEN_test_RandM(VINSERTI128_0x1, "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8", "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8") GEN_test_RandM(VPERM2I128_0x00, "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0xFF, "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x30, "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x21, "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x12, "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x03, "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x85, "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPERM2I128_0x5A, "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7", "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7") GEN_test_Ronly(VBROADCASTSS_128, "vbroadcastss %%xmm9, %%xmm7") GEN_test_Ronly(VBROADCASTSS_256, "vbroadcastss %%xmm9, %%ymm7") GEN_test_Ronly(VBROADCASTSD_256, "vbroadcastsd %%xmm9, %%ymm7") GEN_test_RandM(VPERMD, "vpermd %%ymm6, %%ymm7, %%ymm9", "vpermd (%%rax), %%ymm7, %%ymm9") GEN_test_RandM(VPERMQ_0x00, "vpermq $0x00, %%ymm6, %%ymm7", "vpermq $0x01, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0xFE, "vpermq $0xFE, %%ymm6, %%ymm7", "vpermq $0xFF, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0x30, "vpermq $0x30, %%ymm6, %%ymm7", "vpermq $0x03, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0x21, "vpermq $0x21, %%ymm6, %%ymm7", "vpermq $0x12, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0xD7, "vpermq $0xD7, %%ymm6, %%ymm7", "vpermq $0x6C, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0xB5, "vpermq $0xB5, %%ymm6, %%ymm7", "vpermq $0x4A, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0x85, "vpermq $0x85, %%ymm6, %%ymm7", "vpermq $0xDC, (%%rax), %%ymm7") GEN_test_RandM(VPERMQ_0x29, "vpermq $0x29, %%ymm6, %%ymm7", "vpermq $0x92, (%%rax), %%ymm7") GEN_test_RandM(VPERMPS, "vpermps %%ymm6, %%ymm7, %%ymm9", "vpermps (%%rax), %%ymm7, %%ymm9") GEN_test_RandM(VPERMPD_0x00, "vpermpd $0x00, %%ymm6, %%ymm7", "vpermpd $0x01, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0xFE, "vpermpd $0xFE, %%ymm6, %%ymm7", "vpermpd $0xFF, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0x30, "vpermpd $0x30, %%ymm6, %%ymm7", "vpermpd $0x03, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0x21, "vpermpd $0x21, %%ymm6, %%ymm7", "vpermpd $0x12, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0xD7, "vpermpd $0xD7, %%ymm6, %%ymm7", "vpermpd $0x6C, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0xB5, "vpermpd $0xB5, %%ymm6, %%ymm7", "vpermpd $0x4A, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0x85, "vpermpd $0x85, %%ymm6, %%ymm7", "vpermpd $0xDC, (%%rax), %%ymm7") GEN_test_RandM(VPERMPD_0x29, "vpermpd $0x29, %%ymm6, %%ymm7", "vpermpd $0x92, (%%rax), %%ymm7") GEN_test_RandM(VPBLENDD_128_0x00, "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x02, "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x04, "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x06, "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x08, "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x0A, "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x0C, "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_128_0x0E, "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7", "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPBLENDD_256_0x00, "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0xFE, "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0x30, "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0x21, "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0xD7, "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0xB5, "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0x85, "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBLENDD_256_0x29, "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7", "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSLLVD_128, "vpslld $27, %%xmm6, %%xmm6;" "vpsrld $27, %%xmm6, %%xmm6;" "vpsllvd %%xmm6, %%xmm8, %%xmm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "vpsllvd (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPSLLVD_256, "vpslld $27, %%ymm6, %%ymm6;" "vpsrld $27, %%ymm6, %%ymm6;" "vpsllvd %%ymm6, %%ymm8, %%ymm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "andl $31, 16(%%rax);" "andl $31, 20(%%rax);" "andl $31, 24(%%rax);" "vpsllvd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSLLVQ_128, "vpsllq $58, %%xmm6, %%xmm6;" "vpsrlq $58, %%xmm6, %%xmm6;" "vpsllvq %%xmm6, %%xmm8, %%xmm7", "andl $63, (%%rax);" "vpsllvq (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPSLLVQ_256, "vpsllq $58, %%ymm6, %%ymm6;" "vpsrlq $58, %%ymm6, %%ymm6;" "vpsllvq %%ymm6, %%ymm8, %%ymm7", "andl $63, (%%rax);" "andl $63, 8(%%rax);" "andl $63, 16(%%rax);" "vpsllvq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSRLVD_128, "vpslld $27, %%xmm6, %%xmm6;" "vpsrld $27, %%xmm6, %%xmm6;" "vpsrlvd %%xmm6, %%xmm8, %%xmm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "vpsrlvd (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPSRLVD_256, "vpslld $27, %%ymm6, %%ymm6;" "vpsrld $27, %%ymm6, %%ymm6;" "vpsrlvd %%ymm6, %%ymm8, %%ymm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "andl $31, 16(%%rax);" "andl $31, 20(%%rax);" "andl $31, 24(%%rax);" "vpsrlvd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSRLVQ_128, "vpsllq $58, %%xmm6, %%xmm6;" "vpsrlq $58, %%xmm6, %%xmm6;" "vpsrlvq %%xmm6, %%xmm8, %%xmm7", "andl $63, (%%rax);" "vpsrlvq (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPSRLVQ_256, "vpsllq $58, %%ymm6, %%ymm6;" "vpsrlq $58, %%ymm6, %%ymm6;" "vpsrlvq %%ymm6, %%ymm8, %%ymm7", "andl $63, (%%rax);" "andl $63, 8(%%rax);" "andl $63, 16(%%rax);" "vpsrlvq (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPSRAVD_128, "vpslld $27, %%xmm6, %%xmm6;" "vpsrld $27, %%xmm6, %%xmm6;" "vpsravd %%xmm6, %%xmm8, %%xmm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "vpsravd (%%rax), %%xmm8, %%xmm7") GEN_test_RandM(VPSRAVD_256, "vpslld $27, %%ymm6, %%ymm6;" "vpsrld $27, %%ymm6, %%ymm6;" "vpsravd %%ymm6, %%ymm8, %%ymm7", "andl $31, (%%rax);" "andl $31, 4(%%rax);" "andl $31, 8(%%rax);" "andl $31, 16(%%rax);" "andl $31, 20(%%rax);" "andl $31, 24(%%rax);" "vpsravd (%%rax), %%ymm8, %%ymm7") GEN_test_RandM(VPBROADCASTB_128, "vpbroadcastb %%xmm9, %%xmm7", "vpbroadcastb (%%rax), %%xmm7") GEN_test_RandM(VPBROADCASTB_256, "vpbroadcastb %%xmm9, %%ymm7", "vpbroadcastb (%%rax), %%ymm7") GEN_test_RandM(VPBROADCASTW_128, "vpbroadcastw %%xmm9, %%xmm7", "vpbroadcastw (%%rax), %%xmm7") GEN_test_RandM(VPBROADCASTW_256, "vpbroadcastw %%xmm9, %%ymm7", "vpbroadcastw (%%rax), %%ymm7") GEN_test_RandM(VPBROADCASTD_128, "vpbroadcastd %%xmm9, %%xmm7", "vpbroadcastd (%%rax), %%xmm7") GEN_test_RandM(VPBROADCASTD_256, "vpbroadcastd %%xmm9, %%ymm7", "vpbroadcastd (%%rax), %%ymm7") GEN_test_RandM(VPBROADCASTQ_128, "vpbroadcastq %%xmm9, %%xmm7", "vpbroadcastq (%%rax), %%xmm7") GEN_test_RandM(VPBROADCASTQ_256, "vpbroadcastq %%xmm9, %%ymm7", "vpbroadcastq (%%rax), %%ymm7") GEN_test_Monly(VPMASKMOVD_128_LoadForm, "vpmaskmovd (%%rax), %%xmm8, %%xmm7;" "vxorps %%xmm6, %%xmm6, %%xmm6;" "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9") GEN_test_Monly(VPMASKMOVD_256_LoadForm, "vpmaskmovd (%%rax), %%ymm8, %%ymm7;" "vxorps %%ymm6, %%ymm6, %%ymm6;" "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9") GEN_test_Monly(VPMASKMOVQ_128_LoadForm, "vpmaskmovq (%%rax), %%xmm8, %%xmm7;" "vxorpd %%xmm6, %%xmm6, %%xmm6;" "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9") GEN_test_Monly(VPMASKMOVQ_256_LoadForm, "vpmaskmovq (%%rax), %%ymm8, %%ymm7;" "vxorpd %%ymm6, %%ymm6, %%ymm6;" "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9") GEN_test_Monly(VPMASKMOVD_128_StoreForm, "vpmaskmovd %%xmm8, %%xmm7, (%%rax);" "vxorps %%xmm6, %%xmm6, %%xmm6;" "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)") GEN_test_Monly(VPMASKMOVD_256_StoreForm, "vpmaskmovd %%ymm8, %%ymm7, (%%rax);" "vxorps %%ymm6, %%ymm6, %%ymm6;" "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)") GEN_test_Monly(VPMASKMOVQ_128_StoreForm, "vpmaskmovq %%xmm8, %%xmm7, (%%rax);" "vxorpd %%xmm6, %%xmm6, %%xmm6;" "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)") GEN_test_Monly(VPMASKMOVQ_256_StoreForm, "vpmaskmovq %%ymm8, %%ymm7, (%%rax);" "vxorpd %%ymm6, %%ymm6, %%ymm6;" "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)") GEN_test_Ronly(VGATHERDPS_128, "vpslld $25, %%xmm7, %%xmm8;" "vpsrld $25, %%xmm8, %%xmm8;" "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERDPS_256, "vpslld $25, %%ymm7, %%ymm8;" "vpsrld $25, %%ymm8, %%ymm8;" "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPS_128_1, "vpsllq $57, %%xmm7, %%xmm8;" "vpsrlq $57, %%xmm8, %%xmm8;" "vpmovsxdq %%xmm6, %%xmm9;" "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPS_256_1, "vpsllq $57, %%ymm7, %%ymm8;" "vpsrlq $57, %%ymm8, %%ymm8;" "vpmovsxdq %%xmm6, %%ymm9;" "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPS_128_2, "vpsllq $57, %%xmm7, %%xmm8;" "vpsrlq $57, %%xmm8, %%xmm8;" "vpmovsxdq %%xmm6, %%xmm9;" "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%xmm8, %%xmm8;" "vpbroadcastq %%xmm7, %%xmm7;" "vpaddq %%xmm7, %%xmm8, %%xmm8;" "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;" "vpsubq %%xmm7, %%xmm8, %%xmm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPS_256_2, "vpsllq $57, %%ymm7, %%ymm8;" "vpsrlq $57, %%ymm8, %%ymm8;" "vpmovsxdq %%xmm6, %%ymm9;" "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%ymm8, %%ymm8;" "vpbroadcastq %%xmm7, %%ymm7;" "vpaddq %%ymm7, %%ymm8, %%ymm8;" "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;" "vpsubq %%ymm7, %%ymm8, %%ymm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERDPD_128, "vpslld $26, %%xmm7, %%xmm8;" "vpsrld $26, %%xmm8, %%xmm8;" "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERDPD_256, "vpslld $26, %%ymm7, %%ymm8;" "vpsrld $26, %%ymm8, %%ymm8;" "vextracti128 $1, %%ymm6, %%xmm9;" "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPD_128_1, "vpsllq $58, %%xmm7, %%xmm8;" "vpsrlq $58, %%xmm8, %%xmm8;" "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPD_256_1, "vpsllq $58, %%ymm7, %%ymm8;" "vpsrlq $58, %%ymm8, %%ymm8;" "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPD_128_2, "vpsllq $58, %%xmm7, %%xmm8;" "vpsrlq $58, %%xmm8, %%xmm8;" "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%xmm8, %%xmm8;" "vpbroadcastq %%xmm7, %%xmm7;" "vpaddq %%xmm7, %%xmm8, %%xmm8;" "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;" "vpsubq %%xmm7, %%xmm8, %%xmm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VGATHERQPD_256_2, "vpsllq $58, %%ymm7, %%ymm8;" "vpsrlq $58, %%ymm8, %%ymm8;" "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%ymm8, %%ymm8;" "vpbroadcastq %%xmm7, %%ymm7;" "vpaddq %%ymm7, %%ymm8, %%ymm8;" "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;" "vpsubq %%ymm7, %%ymm8, %%ymm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERDD_128, "vpslld $25, %%xmm7, %%xmm8;" "vpsrld $25, %%xmm8, %%xmm8;" "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERDD_256, "vpslld $25, %%ymm7, %%ymm8;" "vpsrld $25, %%ymm8, %%ymm8;" "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQD_128_1, "vpsllq $57, %%xmm7, %%xmm8;" "vpsrlq $57, %%xmm8, %%xmm8;" "vpmovsxdq %%xmm6, %%xmm9;" "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQD_256_1, "vpsllq $57, %%ymm7, %%ymm8;" "vpsrlq $57, %%ymm8, %%ymm8;" "vpmovsxdq %%xmm6, %%ymm9;" "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQD_128_2, "vpsllq $57, %%xmm7, %%xmm8;" "vpsrlq $57, %%xmm8, %%xmm8;" "vpmovsxdq %%xmm6, %%xmm9;" "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%xmm8, %%xmm8;" "vpbroadcastq %%xmm7, %%xmm7;" "vpaddq %%xmm7, %%xmm8, %%xmm8;" "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;" "vpsubq %%xmm7, %%xmm8, %%xmm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQD_256_2, "vpsllq $57, %%ymm7, %%ymm8;" "vpsrlq $57, %%ymm8, %%ymm8;" "vpmovsxdq %%xmm6, %%ymm9;" "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%ymm8, %%ymm8;" "vpbroadcastq %%xmm7, %%ymm7;" "vpaddq %%ymm7, %%ymm8, %%ymm8;" "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;" "vpsubq %%ymm7, %%ymm8, %%ymm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERDQ_128, "vpslld $26, %%xmm7, %%xmm8;" "vpsrld $26, %%xmm8, %%xmm8;" "vshufps $13, %%xmm6, %%xmm6, %%xmm9;" "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERDQ_256, "vpslld $26, %%ymm7, %%ymm8;" "vpsrld $26, %%ymm8, %%ymm8;" "vextracti128 $1, %%ymm6, %%xmm9;" "vshufps $221, %%ymm9, %%ymm6, %%ymm9;" "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;" "vmovdqa 96(%0), %%ymm9;" "leaq randArray(%%rip), %%r14;" "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQQ_128_1, "vpsllq $58, %%xmm7, %%xmm8;" "vpsrlq $58, %%xmm8, %%xmm8;" "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQQ_256_1, "vpsllq $58, %%ymm7, %%ymm8;" "vpsrlq $58, %%ymm8, %%ymm8;" "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQQ_128_2, "vpsllq $58, %%xmm7, %%xmm8;" "vpsrlq $58, %%xmm8, %%xmm8;" "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%xmm8, %%xmm8;" "vpbroadcastq %%xmm7, %%xmm7;" "vpaddq %%xmm7, %%xmm8, %%xmm8;" "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;" "vpsubq %%xmm7, %%xmm8, %%xmm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") GEN_test_Ronly(VPGATHERQQ_256_2, "vpsllq $58, %%ymm7, %%ymm8;" "vpsrlq $58, %%ymm8, %%ymm8;" "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;" "leaq randArray(%%rip), %%r14;" "vmovq %%r14, %%xmm7;" "vpsllq $2, %%ymm8, %%ymm8;" "vpbroadcastq %%xmm7, %%ymm7;" "vpaddq %%ymm7, %%ymm8, %%ymm8;" "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;" "vpsubq %%ymm7, %%ymm8, %%ymm8;" "vmovdqa 0(%0), %%ymm7;" "xorl %%r14d, %%r14d") /* Comment duplicated above, for convenient reference: Allowed operands in test insns: Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14. Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14. Imm8 etc fields are also allowed, where they make sense. Both forms may use ymm0 as scratch. Mem form may also use ymm6 as scratch. */ #define N_DEFAULT_ITERS 3 // Do the specified test some number of times #define DO_N(_iters, _testfn) \ do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0) // Do the specified test the default number of times #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn) int main ( void ) { DO_D( VPOR_256 ); DO_D( VPXOR_256 ); DO_D( VPSUBB_256 ); DO_D( VPSUBD_256 ); DO_D( VPADDD_256 ); DO_D( VPMOVZXWD_256 ); DO_D( VPMOVZXBW_256 ); DO_D( VPBLENDVB_256 ); DO_D( VPMINSD_256 ); DO_D( VPMAXSD_256 ); DO_D( VPSHUFB_256 ); DO_D( VPUNPCKLBW_256 ); DO_D( VPUNPCKHBW_256 ); DO_D( VPABSD_256 ); DO_D( VPACKUSWB_256 ); DO_D( VPMOVMSKB_256 ); DO_D( VPAND_256 ); DO_D( VPCMPEQB_256 ); DO_D( VPSHUFLW_0x39_256 ); DO_D( VPSHUFHW_0x39_256 ); DO_D( VPMULLW_256 ); DO_D( VPADDUSW_256 ); DO_D( VPMULHUW_256 ); DO_D( VPADDUSB_256 ); DO_D( VPUNPCKLWD_256 ); DO_D( VPUNPCKHWD_256 ); DO_D( VPSLLD_0x05_256 ); DO_D( VPSRLD_0x05_256 ); DO_D( VPSRAD_0x05_256 ); DO_D( VPSUBUSB_256 ); DO_D( VPSUBSB_256 ); DO_D( VPSRLDQ_0x05_256 ); DO_D( VPSLLDQ_0x05_256 ); DO_D( VPANDN_256 ); DO_D( VPUNPCKLQDQ_256 ); DO_D( VPSRLW_0x05_256 ); DO_D( VPSLLW_0x05_256 ); DO_D( VPADDW_256 ); DO_D( VPACKSSDW_256 ); DO_D( VPUNPCKLDQ_256 ); DO_D( VPCMPEQD_256 ); DO_D( VPSHUFD_0x39_256 ); DO_D( VPADDQ_256 ); DO_D( VPSUBQ_256 ); DO_D( VPSUBW_256 ); DO_D( VPCMPEQQ_256 ); DO_D( VPCMPGTQ_256 ); DO_D( VPSRLQ_0x05_256 ); DO_D( VPMULUDQ_256 ); DO_D( VPMULDQ_256 ); DO_D( VPSLLQ_0x05_256 ); DO_D( VPMAXUD_256 ); DO_D( VPMINUD_256 ); DO_D( VPMULLD_256 ); DO_D( VPMAXUW_256 ); DO_D( VPMINUW_256 ); DO_D( VPMAXSW_256 ); DO_D( VPMINSW_256 ); DO_D( VPMAXUB_256 ); DO_D( VPMINUB_256 ); DO_D( VPMAXSB_256 ); DO_D( VPMINSB_256 ); DO_D( VPMOVSXBW_256 ); DO_D( VPSUBUSW_256 ); DO_D( VPSUBSW_256 ); DO_D( VPCMPEQW_256 ); DO_D( VPADDB_256 ); DO_D( VPUNPCKHDQ_256 ); DO_D( VPMOVSXDQ_256 ); DO_D( VPMOVSXWD_256 ); DO_D( VPMULHW_256 ); DO_D( VPUNPCKHQDQ_256 ); DO_D( VPSRAW_0x05_256 ); DO_D( VPCMPGTB_256 ); DO_D( VPCMPGTW_256 ); DO_D( VPCMPGTD_256 ); DO_D( VPMOVZXBD_256 ); DO_D( VPMOVSXBD_256 ); DO_D( VPALIGNR_256_1of3 ); DO_D( VPALIGNR_256_2of3 ); DO_D( VPALIGNR_256_3of3 ); DO_D( VPBLENDW_256_0x00 ); DO_D( VPBLENDW_256_0xFE ); DO_D( VPBLENDW_256_0x30 ); DO_D( VPBLENDW_256_0x21 ); DO_D( VPBLENDW_256_0xD7 ); DO_D( VPBLENDW_256_0xB5 ); DO_D( VPBLENDW_256_0x85 ); DO_D( VPBLENDW_256_0x29 ); DO_D( VPSLLW_256 ); DO_D( VPSRLW_256 ); DO_D( VPSRAW_256 ); DO_D( VPSLLD_256 ); DO_D( VPSRLD_256 ); DO_D( VPSRAD_256 ); DO_D( VPSLLQ_256 ); DO_D( VPSRLQ_256 ); DO_D( VPMADDWD_256 ); DO_D( VMOVNTDQA_256 ); DO_D( VPACKSSWB_256 ); DO_D( VPAVGB_256 ); DO_D( VPAVGW_256 ); DO_D( VPADDSB_256 ); DO_D( VPADDSW_256 ); DO_D( VPHADDW_256 ); DO_D( VPHADDD_256 ); DO_D( VPHADDSW_256 ); DO_D( VPMADDUBSW_256 ); DO_D( VPHSUBW_256 ); DO_D( VPHSUBD_256 ); DO_D( VPHSUBSW_256 ); DO_D( VPABSB_256 ); DO_D( VPABSW_256 ); DO_D( VPMOVSXBQ_256 ); DO_D( VPMOVSXWQ_256 ); DO_D( VPACKUSDW_256 ); DO_D( VPMOVZXBQ_256 ); DO_D( VPMOVZXWQ_256 ); DO_D( VPMOVZXDQ_256 ); DO_D( VMPSADBW_256_0x0 ); DO_D( VMPSADBW_256_0x39 ); DO_D( VMPSADBW_256_0x32 ); DO_D( VMPSADBW_256_0x2b ); DO_D( VMPSADBW_256_0x24 ); DO_D( VMPSADBW_256_0x1d ); DO_D( VMPSADBW_256_0x16 ); DO_D( VMPSADBW_256_0x0f ); DO_D( VPSADBW_256 ); DO_D( VPSIGNB_256 ); DO_D( VPSIGNW_256 ); DO_D( VPSIGND_256 ); DO_D( VPMULHRSW_256 ); DO_D( VBROADCASTI128 ); DO_D( VEXTRACTI128_0x0 ); DO_D( VEXTRACTI128_0x1 ); DO_D( VINSERTI128_0x0 ); DO_D( VINSERTI128_0x1 ); DO_D( VPERM2I128_0x00 ); DO_D( VPERM2I128_0xFF ); DO_D( VPERM2I128_0x30 ); DO_D( VPERM2I128_0x21 ); DO_D( VPERM2I128_0x12 ); DO_D( VPERM2I128_0x03 ); DO_D( VPERM2I128_0x85 ); DO_D( VPERM2I128_0x5A ); DO_D( VBROADCASTSS_128 ); DO_D( VBROADCASTSS_256 ); DO_D( VBROADCASTSD_256 ); DO_D( VPERMD ); DO_D( VPERMQ_0x00 ); DO_D( VPERMQ_0xFE ); DO_D( VPERMQ_0x30 ); DO_D( VPERMQ_0x21 ); DO_D( VPERMQ_0xD7 ); DO_D( VPERMQ_0xB5 ); DO_D( VPERMQ_0x85 ); DO_D( VPERMQ_0x29 ); DO_D( VPERMPS ); DO_D( VPERMPD_0x00 ); DO_D( VPERMPD_0xFE ); DO_D( VPERMPD_0x30 ); DO_D( VPERMPD_0x21 ); DO_D( VPERMPD_0xD7 ); DO_D( VPERMPD_0xB5 ); DO_D( VPERMPD_0x85 ); DO_D( VPERMPD_0x29 ); DO_D( VPBLENDD_128_0x00 ); DO_D( VPBLENDD_128_0x02 ); DO_D( VPBLENDD_128_0x04 ); DO_D( VPBLENDD_128_0x06 ); DO_D( VPBLENDD_128_0x08 ); DO_D( VPBLENDD_128_0x0A ); DO_D( VPBLENDD_128_0x0C ); DO_D( VPBLENDD_128_0x0E ); DO_D( VPBLENDD_256_0x00 ); DO_D( VPBLENDD_256_0xFE ); DO_D( VPBLENDD_256_0x30 ); DO_D( VPBLENDD_256_0x21 ); DO_D( VPBLENDD_256_0xD7 ); DO_D( VPBLENDD_256_0xB5 ); DO_D( VPBLENDD_256_0x85 ); DO_D( VPBLENDD_256_0x29 ); DO_D( VPSLLVD_128 ); DO_D( VPSLLVD_256 ); DO_D( VPSLLVQ_128 ); DO_D( VPSLLVQ_256 ); DO_D( VPSRLVD_128 ); DO_D( VPSRLVD_256 ); DO_D( VPSRLVQ_128 ); DO_D( VPSRLVQ_256 ); DO_D( VPSRAVD_128 ); DO_D( VPSRAVD_256 ); DO_D( VPBROADCASTB_128 ); DO_D( VPBROADCASTB_256 ); DO_D( VPBROADCASTW_128 ); DO_D( VPBROADCASTW_256 ); DO_D( VPBROADCASTD_128 ); DO_D( VPBROADCASTD_256 ); DO_D( VPBROADCASTQ_128 ); DO_D( VPBROADCASTQ_256 ); DO_D( VPMASKMOVD_128_LoadForm ); DO_D( VPMASKMOVD_256_LoadForm ); DO_D( VPMASKMOVQ_128_LoadForm ); DO_D( VPMASKMOVQ_256_LoadForm ); DO_D( VPMASKMOVD_128_StoreForm ); DO_D( VPMASKMOVD_256_StoreForm ); DO_D( VPMASKMOVQ_128_StoreForm ); DO_D( VPMASKMOVQ_256_StoreForm ); { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); } DO_D( VGATHERDPS_128 ); DO_D( VGATHERDPS_256 ); DO_D( VGATHERQPS_128_1 ); DO_D( VGATHERQPS_256_1 ); DO_D( VGATHERQPS_128_2 ); DO_D( VGATHERQPS_256_2 ); DO_D( VGATHERDPD_128 ); DO_D( VGATHERDPD_256 ); DO_D( VGATHERQPD_128_1 ); DO_D( VGATHERQPD_256_1 ); DO_D( VGATHERQPD_128_2 ); DO_D( VGATHERQPD_256_2 ); DO_D( VPGATHERDD_128 ); DO_D( VPGATHERDD_256 ); DO_D( VPGATHERQD_128_1 ); DO_D( VPGATHERQD_256_1 ); DO_D( VPGATHERQD_128_2 ); DO_D( VPGATHERQD_256_2 ); DO_D( VPGATHERDQ_128 ); DO_D( VPGATHERDQ_256 ); DO_D( VPGATHERQQ_128_1 ); DO_D( VPGATHERQQ_256_1 ); DO_D( VPGATHERQQ_128_2 ); DO_D( VPGATHERQQ_256_2 ); return 0; }