1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+gfni,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8) 6define <16 x i8> @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) { 7; X86-LABEL: test_vgf2p8affineinvqb_128: 8; X86: # %bb.0: 9; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 10; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x03] 11; X86-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xd9,0x04] 12; X86-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05] 13; X86-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] 14; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 15; X86-NEXT: retl # encoding: [0xc3] 16; 17; X64-LABEL: test_vgf2p8affineinvqb_128: 18; X64: # %bb.0: 19; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 20; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x03] 21; X64-NEXT: vgf2p8affineinvqb $4, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xd9,0x04] 22; X64-NEXT: vgf2p8affineinvqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x05] 23; X64-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] 24; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 25; X64-NEXT: retq # encoding: [0xc3] 26 %1 = bitcast i16 %mask to <16 x i1> 27 %2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3) 28 %3 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4) 29 %4 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5) 30 %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer 31 %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru 32 %7 = xor <16 x i8> %5, %6 33 %8 = xor <16 x i8> %7, %2 34 ret <16 x i8> %8 35} 36 37declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8) 38define <32 x i8> @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) { 39; X86-LABEL: test_vgf2p8affineinvqb_256: 40; X86: # %bb.0: 41; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 42; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x03] 43; X86-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xd9,0x04] 44; X86-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05] 45; X86-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] 46; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 47; X86-NEXT: retl # encoding: [0xc3] 48; 49; X64-LABEL: test_vgf2p8affineinvqb_256: 50; X64: # %bb.0: 51; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 52; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x03] 53; X64-NEXT: vgf2p8affineinvqb $4, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xd9,0x04] 54; X64-NEXT: vgf2p8affineinvqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x05] 55; X64-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] 56; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 57; X64-NEXT: retq # encoding: [0xc3] 58 %1 = bitcast i32 %mask to <32 x i1> 59 %2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3) 60 %3 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4) 61 %4 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5) 62 %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer 63 %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru 64 %7 = xor <32 x i8> %5, %6 65 %8 = xor <32 x i8> %7, %2 66 ret <32 x i8> %8 67} 68 69declare <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8>, <64 x i8>, i8) 70define <64 x i8> @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) { 71; X86-LABEL: test_vgf2p8affineinvqb_512: 72; X86: # %bb.0: 73; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 74; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x03] 75; X86-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xd9,0x04] 76; X86-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05] 77; X86-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] 78; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] 79; X86-NEXT: retl # encoding: [0xc3] 80; 81; X64-LABEL: test_vgf2p8affineinvqb_512: 82; X64: # %bb.0: 83; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 84; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x03] 85; X64-NEXT: vgf2p8affineinvqb $4, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xd9,0x04] 86; X64-NEXT: vgf2p8affineinvqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x05] 87; X64-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] 88; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] 89; X64-NEXT: retq # encoding: [0xc3] 90 %1 = bitcast i64 %mask to <64 x i1> 91 %2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3) 92 %3 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4) 93 %4 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5) 94 %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer 95 %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru 96 %7 = xor <64 x i8> %5, %6 97 %8 = xor <64 x i8> %7, %2 98 ret <64 x i8> %8 99} 100 101declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8) 102define <16 x i8> @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) { 103; X86-LABEL: test_vgf2p8affineqb_128: 104; X86: # %bb.0: 105; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 106; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x03] 107; X86-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xd9,0x04] 108; X86-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05] 109; X86-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] 110; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 111; X86-NEXT: retl # encoding: [0xc3] 112; 113; X64-LABEL: test_vgf2p8affineqb_128: 114; X64: # %bb.0: 115; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 116; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x03] 117; X64-NEXT: vgf2p8affineqb $4, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xd9,0x04] 118; X64-NEXT: vgf2p8affineqb $5, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x05] 119; X64-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] 120; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] 121; X64-NEXT: retq # encoding: [0xc3] 122 %1 = bitcast i16 %mask to <16 x i1> 123 %2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3) 124 %3 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 4) 125 %4 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 5) 126 %5 = select <16 x i1> %1, <16 x i8> %3, <16 x i8> zeroinitializer 127 %6 = select <16 x i1> %1, <16 x i8> %4, <16 x i8> %passthru 128 %7 = xor <16 x i8> %5, %6 129 %8 = xor <16 x i8> %7, %2 130 ret <16 x i8> %8 131} 132 133declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8) 134define <32 x i8> @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) { 135; X86-LABEL: test_vgf2p8affineqb_256: 136; X86: # %bb.0: 137; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 138; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x03] 139; X86-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xd9,0x04] 140; X86-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05] 141; X86-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] 142; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 143; X86-NEXT: retl # encoding: [0xc3] 144; 145; X64-LABEL: test_vgf2p8affineqb_256: 146; X64: # %bb.0: 147; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 148; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x03] 149; X64-NEXT: vgf2p8affineqb $4, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xd9,0x04] 150; X64-NEXT: vgf2p8affineqb $5, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x05] 151; X64-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] 152; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] 153; X64-NEXT: retq # encoding: [0xc3] 154 %1 = bitcast i32 %mask to <32 x i1> 155 %2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3) 156 %3 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 4) 157 %4 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 5) 158 %5 = select <32 x i1> %1, <32 x i8> %3, <32 x i8> zeroinitializer 159 %6 = select <32 x i1> %1, <32 x i8> %4, <32 x i8> %passthru 160 %7 = xor <32 x i8> %5, %6 161 %8 = xor <32 x i8> %7, %2 162 ret <32 x i8> %8 163} 164 165declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8) 166define <64 x i8> @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) { 167; X86-LABEL: test_vgf2p8affineqb_512: 168; X86: # %bb.0: 169; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 170; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x03] 171; X86-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xd9,0x04] 172; X86-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05] 173; X86-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] 174; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] 175; X86-NEXT: retl # encoding: [0xc3] 176; 177; X64-LABEL: test_vgf2p8affineqb_512: 178; X64: # %bb.0: 179; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 180; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x03] 181; X64-NEXT: vgf2p8affineqb $4, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xd9,0x04] 182; X64-NEXT: vgf2p8affineqb $5, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x05] 183; X64-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] 184; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] 185; X64-NEXT: retq # encoding: [0xc3] 186 %1 = bitcast i64 %mask to <64 x i1> 187 %2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3) 188 %3 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 4) 189 %4 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 5) 190 %5 = select <64 x i1> %1, <64 x i8> %3, <64 x i8> zeroinitializer 191 %6 = select <64 x i1> %1, <64 x i8> %4, <64 x i8> %passthru 192 %7 = xor <64 x i8> %5, %6 193 %8 = xor <64 x i8> %7, %2 194 ret <64 x i8> %8 195} 196 197declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>) 198define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2) { 199; CHECK-LABEL: test_vgf2p8mulb_128: 200; CHECK: # %bb.0: 201; CHECK-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1] 202; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 203 %1 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2) 204 ret <16 x i8> %1 205} 206 207define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %passthru, i16 %mask) { 208; X86-LABEL: test_vgf2p8mulb_128_mask: 209; X86: # %bb.0: 210; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 211; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1] 212; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 213; X86-NEXT: retl # encoding: [0xc3] 214; 215; X64-LABEL: test_vgf2p8mulb_128_mask: 216; X64: # %bb.0: 217; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 218; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1] 219; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 220; X64-NEXT: retq # encoding: [0xc3] 221 %1 = bitcast i16 %mask to <16 x i1> 222 %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2) 223 %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> %passthru 224 ret <16 x i8> %3 225} 226 227define <16 x i8> @test_vgf2p8mulb_128_maskz(<16 x i8> %src1, <16 x i8> %src2, i16 %mask) { 228; X86-LABEL: test_vgf2p8mulb_128_maskz: 229; X86: # %bb.0: 230; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 231; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1] 232; X86-NEXT: retl # encoding: [0xc3] 233; 234; X64-LABEL: test_vgf2p8mulb_128_maskz: 235; X64: # %bb.0: 236; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 237; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xc1] 238; X64-NEXT: retq # encoding: [0xc3] 239 %1 = bitcast i16 %mask to <16 x i1> 240 %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2) 241 %3 = select <16 x i1> %1, <16 x i8> %2, <16 x i8> zeroinitializer 242 ret <16 x i8> %3 243} 244 245declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>) 246define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2) { 247; CHECK-LABEL: test_vgf2p8mulb_256: 248; CHECK: # %bb.0: 249; CHECK-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1] 250; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 251 %1 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2) 252 ret <32 x i8> %1 253} 254 255define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 x i8> %passthru, i32 %mask) { 256; X86-LABEL: test_vgf2p8mulb_256_mask: 257; X86: # %bb.0: 258; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 259; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1] 260; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 261; X86-NEXT: retl # encoding: [0xc3] 262; 263; X64-LABEL: test_vgf2p8mulb_256_mask: 264; X64: # %bb.0: 265; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 266; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1] 267; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 268; X64-NEXT: retq # encoding: [0xc3] 269 %1 = bitcast i32 %mask to <32 x i1> 270 %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2) 271 %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> %passthru 272 ret <32 x i8> %3 273} 274 275define <32 x i8> @test_vgf2p8mulb_256_maskz(<32 x i8> %src1, <32 x i8> %src2, i32 %mask) { 276; X86-LABEL: test_vgf2p8mulb_256_maskz: 277; X86: # %bb.0: 278; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 279; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1] 280; X86-NEXT: retl # encoding: [0xc3] 281; 282; X64-LABEL: test_vgf2p8mulb_256_maskz: 283; X64: # %bb.0: 284; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 285; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xc1] 286; X64-NEXT: retq # encoding: [0xc3] 287 %1 = bitcast i32 %mask to <32 x i1> 288 %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2) 289 %3 = select <32 x i1> %1, <32 x i8> %2, <32 x i8> zeroinitializer 290 ret <32 x i8> %3 291} 292 293declare <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8>, <64 x i8>) 294define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2) { 295; CHECK-LABEL: test_vgf2p8mulb_512: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xc1] 298; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 299 %1 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2) 300 ret <64 x i8> %1 301} 302 303define <64 x i8> @test_vgf2p8mulb_512_mask(<64 x i8> %src1, <64 x i8> %src2, <64 x i8> %passthru, i64 %mask) { 304; X86-LABEL: test_vgf2p8mulb_512_mask: 305; X86: # %bb.0: 306; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 307; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1] 308; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 309; X86-NEXT: retl # encoding: [0xc3] 310; 311; X64-LABEL: test_vgf2p8mulb_512_mask: 312; X64: # %bb.0: 313; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 314; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1] 315; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 316; X64-NEXT: retq # encoding: [0xc3] 317 %1 = bitcast i64 %mask to <64 x i1> 318 %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2) 319 %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> %passthru 320 ret <64 x i8> %3 321} 322 323define <64 x i8> @test_vgf2p8mulb_512_maskz(<64 x i8> %src1, <64 x i8> %src2, i64 %mask) { 324; X86-LABEL: test_vgf2p8mulb_512_maskz: 325; X86: # %bb.0: 326; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 327; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1] 328; X86-NEXT: retl # encoding: [0xc3] 329; 330; X64-LABEL: test_vgf2p8mulb_512_maskz: 331; X64: # %bb.0: 332; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 333; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xc1] 334; X64-NEXT: retq # encoding: [0xc3] 335 %1 = bitcast i64 %mask to <64 x i1> 336 %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2) 337 %3 = select <64 x i1> %1, <64 x i8> %2, <64 x i8> zeroinitializer 338 ret <64 x i8> %3 339} 340