1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) 6 7define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { 8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 9; X86: # %bb.0: 10; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04] 11; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 12; X86-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1] 13; X86-NEXT: vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1] 14; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] 15; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 16; X86-NEXT: retl # encoding: [0xc3] 17; 18; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 19; X64: # %bb.0: 20; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] 21; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 22; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] 23; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] 24; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] 25; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 26; X64-NEXT: retq # encoding: [0xc3] 27 %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) 28 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) 29 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) 30 %res3 = add <16 x i8> %res, %res1 31 %res4 = add <16 x i8> %res2, %res3 32 ret <16 x i8> %res4 33} 34 35 36declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) 37 38define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { 39; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 40; X86: # %bb.0: 41; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x4c,0x24,0x04] 42; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 43; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 44; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] 45; X86-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1] 46; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 47; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 48; X86-NEXT: retl # encoding: [0xc3] 49; 50; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 51; X64: # %bb.0: 52; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] 53; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 54; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] 55; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] 56; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] 57; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 58; X64-NEXT: retq # encoding: [0xc3] 59 %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) 60 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) 61 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) 62 %res3 = add <8 x i16> %res, %res1 63 %res4 = add <8 x i16> %res2, %res3 64 ret <8 x i16> %res4 65} 66 67 68declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) 69 70define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { 71; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 72; X86: # %bb.0: 73; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04] 74; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 75; X86-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1] 76; X86-NEXT: vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1] 77; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] 78; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 79; X86-NEXT: retl # encoding: [0xc3] 80; 81; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 82; X64: # %bb.0: 83; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] 84; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 85; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] 86; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] 87; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] 88; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 89; X64-NEXT: retq # encoding: [0xc3] 90 %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) 91 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) 92 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) 93 %res3 = add <32 x i8> %res, %res1 94 %res4 = add <32 x i8> %res2, %res3 95 ret <32 x i8> %res4 96} 97 98 99 100declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) 101 102define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { 103; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 104; X86: # %bb.0: 105; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04] 106; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 107; X86-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1] 108; X86-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1] 109; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 110; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 111; X86-NEXT: retl # encoding: [0xc3] 112; 113; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 114; X64: # %bb.0: 115; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] 116; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 117; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] 118; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] 119; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] 120; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 121; X64-NEXT: retq # encoding: [0xc3] 122 %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) 123 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) 124 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) 125 %res3 = add <16 x i16> %res, %res1 126 %res4 = add <16 x i16> %res2, %res3 127 ret <16 x i16> %res4 128} 129 130declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) 131 132define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1) { 133; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256: 134; CHECK: # %bb.0: 135; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xc0] 136; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 137 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) 138 ret <32 x i8> %res 139} 140 141define <32 x i8>@test_int_x86_avx512_mask_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { 142; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_256: 143; X86: # %bb.0: 144; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 145; X86-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 146; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 147; X86-NEXT: retl # encoding: [0xc3] 148; 149; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_256: 150; X64: # %bb.0: 151; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 152; X64-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] 153; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 154; X64-NEXT: retq # encoding: [0xc3] 155 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) 156 ret <32 x i8> %res 157} 158 159define <32 x i8>@test_int_x86_avx512_maskz_pbroadcastb_256(<16 x i8> %x0, i32 %mask) { 160; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256: 161; X86: # %bb.0: 162; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 163; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 164; X86-NEXT: retl # encoding: [0xc3] 165; 166; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256: 167; X64: # %bb.0: 168; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 169; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] 170; X64-NEXT: retq # encoding: [0xc3] 171 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask) 172 ret <32 x i8> %res 173} 174 175declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) 176 177define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1) { 178; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xc0] 181; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 182 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 183 ret <16 x i8> %res 184} 185 186define <16 x i8>@test_int_x86_avx512_mask_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 187; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_128: 188; X86: # %bb.0: 189; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 190; X86-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 191; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 192; X86-NEXT: retl # encoding: [0xc3] 193; 194; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_128: 195; X64: # %bb.0: 196; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 197; X64-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] 198; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 199; X64-NEXT: retq # encoding: [0xc3] 200 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) 201 ret <16 x i8> %res 202} 203 204define <16 x i8>@test_int_x86_avx512_maskz_pbroadcastb_128(<16 x i8> %x0, i16 %mask) { 205; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128: 206; X86: # %bb.0: 207; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 208; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 209; X86-NEXT: retl # encoding: [0xc3] 210; 211; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128: 212; X64: # %bb.0: 213; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 214; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] 215; X64-NEXT: retq # encoding: [0xc3] 216 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask) 217 ret <16 x i8> %res 218} 219 220declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) 221 222define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1) { 223; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: 224; CHECK: # %bb.0: 225; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xc0] 226; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 227 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) 228 ret <16 x i16> %res 229} 230 231define <16 x i16>@test_int_x86_avx512_mask_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { 232; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_256: 233; X86: # %bb.0: 234; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 235; X86-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 236; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 237; X86-NEXT: retl # encoding: [0xc3] 238; 239; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_256: 240; X64: # %bb.0: 241; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 242; X64-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] 243; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 244; X64-NEXT: retq # encoding: [0xc3] 245 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) 246 ret <16 x i16> %res 247} 248 249define <16 x i16>@test_int_x86_avx512_maskz_pbroadcastw_256(<8 x i16> %x0, i16 %mask) { 250; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256: 251; X86: # %bb.0: 252; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 253; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 254; X86-NEXT: retl # encoding: [0xc3] 255; 256; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256: 257; X64: # %bb.0: 258; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 259; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] 260; X64-NEXT: retq # encoding: [0xc3] 261 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask) 262 ret <16 x i16> %res 263} 264 265declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8) 266 267define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1) { 268; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: 269; CHECK: # %bb.0: 270; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xc0] 271; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 272 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 273 ret <8 x i16> %res 274} 275 276define <8 x i16>@test_int_x86_avx512_mask_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { 277; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_128: 278; X86: # %bb.0: 279; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 280; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 281; X86-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 282; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 283; X86-NEXT: retl # encoding: [0xc3] 284; 285; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_128: 286; X64: # %bb.0: 287; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 288; X64-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] 289; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 290; X64-NEXT: retq # encoding: [0xc3] 291 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) 292 ret <8 x i16> %res 293} 294 295define <8 x i16>@test_int_x86_avx512_maskz_pbroadcastw_128(<8 x i16> %x0, i8 %mask) { 296; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128: 297; X86: # %bb.0: 298; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 299; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 300; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 301; X86-NEXT: retl # encoding: [0xc3] 302; 303; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128: 304; X64: # %bb.0: 305; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 306; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] 307; X64-NEXT: retq # encoding: [0xc3] 308 %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask) 309 ret <8 x i16> %res 310} 311 312declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64) 313 314define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1) { 315; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512: 316; CHECK: # %bb.0: 317; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0] 318; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 319 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) 320 ret <64 x i8> %res 321} 322 323define <64 x i8>@test_int_x86_avx512_mask_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) { 324; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_512: 325; X86: # %bb.0: 326; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 327; X86-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 328; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 329; X86-NEXT: retl # encoding: [0xc3] 330; 331; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_512: 332; X64: # %bb.0: 333; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 334; X64-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] 335; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 336; X64-NEXT: retq # encoding: [0xc3] 337 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) 338 ret <64 x i8> %res 339} 340 341define <64 x i8>@test_int_x86_avx512_maskz_pbroadcastb_512(<16 x i8> %x0, i64 %mask) { 342; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512: 343; X86: # %bb.0: 344; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 345; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 346; X86-NEXT: retl # encoding: [0xc3] 347; 348; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512: 349; X64: # %bb.0: 350; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 351; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] 352; X64-NEXT: retq # encoding: [0xc3] 353 %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask) 354 ret <64 x i8> %res 355} 356 357declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32) 358 359define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1) { 360; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0] 363; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 364 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) 365 ret <32 x i16> %res 366} 367 368define <32 x i16>@test_int_x86_avx512_mask_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) { 369; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_512: 370; X86: # %bb.0: 371; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 372; X86-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 373; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 374; X86-NEXT: retl # encoding: [0xc3] 375; 376; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_512: 377; X64: # %bb.0: 378; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 379; X64-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] 380; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 381; X64-NEXT: retq # encoding: [0xc3] 382 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) 383 ret <32 x i16> %res 384} 385 386define <32 x i16>@test_int_x86_avx512_maskz_pbroadcastw_512(<8 x i16> %x0, i32 %mask) { 387; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512: 388; X86: # %bb.0: 389; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 390; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 391; X86-NEXT: retl # encoding: [0xc3] 392; 393; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512: 394; X64: # %bb.0: 395; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 396; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] 397; X64-NEXT: retq # encoding: [0xc3] 398 %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask) 399 ret <32 x i16> %res 400} 401 402declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16) 403 404define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { 405; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128: 406; X86: # %bb.0: 407; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 408; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 409; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 410; X86-NEXT: vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01] 411; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 412; X86-NEXT: retl # encoding: [0xc3] 413; 414; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128: 415; X64: # %bb.0: 416; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 417; X64-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] 418; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 419; X64-NEXT: retq # encoding: [0xc3] 420 call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) 421 call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) 422 ret void 423} 424 425declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32) 426 427define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) { 428; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256: 429; X86: # %bb.0: 430; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 431; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 432; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 433; X86-NEXT: vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01] 434; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 435; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 436; X86-NEXT: retl # encoding: [0xc3] 437; 438; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256: 439; X64: # %bb.0: 440; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 441; X64-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] 442; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 443; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 444; X64-NEXT: retq # encoding: [0xc3] 445 call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) 446 call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) 447 ret void 448} 449 450declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8) 451 452define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { 453; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128: 454; X86: # %bb.0: 455; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 456; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 457; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 458; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 459; X86-NEXT: vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01] 460; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 461; X86-NEXT: retl # encoding: [0xc3] 462; 463; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128: 464; X64: # %bb.0: 465; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 466; X64-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] 467; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 468; X64-NEXT: retq # encoding: [0xc3] 469 call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) 470 call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) 471 ret void 472} 473 474declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16) 475 476define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { 477; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256: 478; X86: # %bb.0: 479; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 480; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 481; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 482; X86-NEXT: vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01] 483; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 484; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 485; X86-NEXT: retl # encoding: [0xc3] 486; 487; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256: 488; X64: # %bb.0: 489; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 490; X64-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] 491; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 492; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 493; X64-NEXT: retq # encoding: [0xc3] 494 call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) 495 call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) 496 ret void 497} 498 499declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8) 500 501define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { 502; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128: 503; X86: # %bb.0: 504; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 505; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 506; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 507; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 508; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 509; X86-NEXT: vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00] 510; X86-NEXT: vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09] 511; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 512; X86-NEXT: retl # encoding: [0xc3] 513; 514; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128: 515; X64: # %bb.0: 516; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 517; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 518; X64-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] 519; X64-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] 520; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 521; X64-NEXT: retq # encoding: [0xc3] 522 %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1) 523 %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) 524 %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) 525 %res2 = add <8 x i16> %res, %res1 526 ret <8 x i16> %res2 527} 528 529declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16) 530 531define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { 532; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256: 533; X86: # %bb.0: 534; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 535; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 536; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 537; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 538; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00] 539; X86-NEXT: vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09] 540; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 541; X86-NEXT: retl # encoding: [0xc3] 542; 543; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256: 544; X64: # %bb.0: 545; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 546; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 547; X64-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] 548; X64-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] 549; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 550; X64-NEXT: retq # encoding: [0xc3] 551 %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1) 552 %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) 553 %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) 554 %res2 = add <16 x i16> %res, %res1 555 ret <16 x i16> %res2 556} 557 558declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16) 559 560define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { 561; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128: 562; X86: # %bb.0: 563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 564; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 565; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 566; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] 567; X86-NEXT: vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00] 568; X86-NEXT: vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09] 569; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 570; X86-NEXT: retl # encoding: [0xc3] 571; 572; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128: 573; X64: # %bb.0: 574; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 575; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 576; X64-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] 577; X64-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] 578; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 579; X64-NEXT: retq # encoding: [0xc3] 580 %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1) 581 %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) 582 %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) 583 %res2 = add <16 x i8> %res, %res1 584 ret <16 x i8> %res2 585} 586 587declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32) 588 589define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { 590; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256: 591; X86: # %bb.0: 592; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 593; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 594; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 595; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 596; X86-NEXT: vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00] 597; X86-NEXT: vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09] 598; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] 599; X86-NEXT: retl # encoding: [0xc3] 600; 601; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256: 602; X64: # %bb.0: 603; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 604; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 605; X64-NEXT: vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06] 606; X64-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f] 607; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] 608; X64-NEXT: retq # encoding: [0xc3] 609 %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1) 610 %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) 611 %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) 612 %res2 = add <32 x i8> %res, %res1 613 ret <32 x i8> %res2 614} 615 616declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) 617 618define <16 x i8>@test_int_x86_avx512_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3) { 619; CHECK-LABEL: test_int_x86_avx512_palignr_128: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x02] 622; CHECK-NEXT: # xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 623; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 624 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1) 625 ret <16 x i8> %res 626} 627 628define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) { 629; X86-LABEL: test_int_x86_avx512_mask_palignr_128: 630; X86: # %bb.0: 631; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 632; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 633; X86-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 634; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 635; X86-NEXT: retl # encoding: [0xc3] 636; 637; X64-LABEL: test_int_x86_avx512_mask_palignr_128: 638; X64: # %bb.0: 639; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 640; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] 641; X64-NEXT: # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 642; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 643; X64-NEXT: retq # encoding: [0xc3] 644 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) 645 ret <16 x i8> %res 646} 647 648define <16 x i8>@test_int_x86_avx512_maskz_palignr_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x4) { 649; X86-LABEL: test_int_x86_avx512_maskz_palignr_128: 650; X86: # %bb.0: 651; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 652; X86-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 653; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 654; X86-NEXT: retl # encoding: [0xc3] 655; 656; X64-LABEL: test_int_x86_avx512_maskz_palignr_128: 657; X64: # %bb.0: 658; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 659; X64-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] 660; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 661; X64-NEXT: retq # encoding: [0xc3] 662 %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) 663 ret <16 x i8> %res 664} 665 666declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32) 667 668define <32 x i8>@test_int_x86_avx512_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3) { 669; CHECK-LABEL: test_int_x86_avx512_palignr_256: 670; CHECK: # %bb.0: 671; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xc1,0x02] 672; CHECK-NEXT: # ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 673; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 674 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1) 675 ret <32 x i8> %res 676} 677 678define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) { 679; X86-LABEL: test_int_x86_avx512_mask_palignr_256: 680; X86: # %bb.0: 681; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 682; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 683; X86-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 684; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 685; X86-NEXT: retl # encoding: [0xc3] 686; 687; X64-LABEL: test_int_x86_avx512_mask_palignr_256: 688; X64: # %bb.0: 689; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 690; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] 691; X64-NEXT: # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 692; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 693; X64-NEXT: retq # encoding: [0xc3] 694 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) 695 ret <32 x i8> %res 696} 697 698define <32 x i8>@test_int_x86_avx512_maskz_palignr_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x4) { 699; X86-LABEL: test_int_x86_avx512_maskz_palignr_256: 700; X86: # %bb.0: 701; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 702; X86-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 703; X86-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 704; X86-NEXT: retl # encoding: [0xc3] 705; 706; X64-LABEL: test_int_x86_avx512_maskz_palignr_256: 707; X64: # %bb.0: 708; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 709; X64-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] 710; X64-NEXT: # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 711; X64-NEXT: retq # encoding: [0xc3] 712 %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) 713 ret <32 x i8> %res 714} 715 716declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8) 717 718define <8 x i16>@test_int_x86_avx512_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) { 719; CHECK-LABEL: test_int_x86_avx512_pshufh_w_128: 720; CHECK: # %bb.0: 721; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x03] 722; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,7,4,4,4] 723; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 724 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 725 ret <8 x i16> %res 726} 727 728define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 729; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 730; X86: # %bb.0: 731; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 732; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 733; X86-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 734; X86-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 735; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 736; X86-NEXT: retl # encoding: [0xc3] 737; 738; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128: 739; X64: # %bb.0: 740; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 741; X64-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] 742; X64-NEXT: # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] 743; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 744; X64-NEXT: retq # encoding: [0xc3] 745 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 746 ret <8 x i16> %res 747} 748 749define <8 x i16>@test_int_x86_avx512_maskz_pshufh_w_128(<8 x i16> %x0, i8 %x3) { 750; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_128: 751; X86: # %bb.0: 752; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 753; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 754; X86-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 755; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 756; X86-NEXT: retl # encoding: [0xc3] 757; 758; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_128: 759; X64: # %bb.0: 760; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 761; X64-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] 762; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] 763; X64-NEXT: retq # encoding: [0xc3] 764 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 765 ret <8 x i16> %res 766} 767 768declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16) 769 770define <16 x i16>@test_int_x86_avx512_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2) { 771; CHECK-LABEL: test_int_x86_avx512_pshufh_w_256: 772; CHECK: # %bb.0: 773; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xc0,0x03] 774; CHECK-NEXT: # ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 775; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 776 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 777 ret <16 x i16> %res 778} 779 780define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 781; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 782; X86: # %bb.0: 783; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 784; X86-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 785; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 786; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 787; X86-NEXT: retl # encoding: [0xc3] 788; 789; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256: 790; X64: # %bb.0: 791; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 792; X64-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] 793; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 794; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 795; X64-NEXT: retq # encoding: [0xc3] 796 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 797 ret <16 x i16> %res 798} 799 800define <16 x i16>@test_int_x86_avx512_maskz_pshufh_w_256(<16 x i16> %x0, i16 %x3) { 801; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_256: 802; X86: # %bb.0: 803; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 804; X86-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 805; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 806; X86-NEXT: retl # encoding: [0xc3] 807; 808; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_256: 809; X64: # %bb.0: 810; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 811; X64-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] 812; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] 813; X64-NEXT: retq # encoding: [0xc3] 814 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 815 ret <16 x i16> %res 816} 817 818declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8) 819 820define <8 x i16>@test_int_x86_avx512_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) { 821; CHECK-LABEL: test_int_x86_avx512_pshufl_w_128: 822; CHECK: # %bb.0: 823; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x03] 824; CHECK-NEXT: # xmm0 = xmm0[3,0,0,0,4,5,6,7] 825; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 826 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 827 ret <8 x i16> %res 828} 829 830define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 831; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 832; X86: # %bb.0: 833; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 834; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 835; X86-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 836; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 837; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 838; X86-NEXT: retl # encoding: [0xc3] 839; 840; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128: 841; X64: # %bb.0: 842; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 843; X64-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] 844; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] 845; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 846; X64-NEXT: retq # encoding: [0xc3] 847 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 848 ret <8 x i16> %res 849} 850 851define <8 x i16>@test_int_x86_avx512_maskz_pshufl_w_128(<8 x i16> %x0, i8 %x3) { 852; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_128: 853; X86: # %bb.0: 854; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 855; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 856; X86-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 857; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 858; X86-NEXT: retl # encoding: [0xc3] 859; 860; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_128: 861; X64: # %bb.0: 862; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 863; X64-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] 864; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] 865; X64-NEXT: retq # encoding: [0xc3] 866 %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 867 ret <8 x i16> %res 868} 869 870declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16) 871 872define <16 x i16>@test_int_x86_avx512_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 873; CHECK-LABEL: test_int_x86_avx512_pshufl_w_256: 874; CHECK: # %bb.0: 875; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xc0,0x03] 876; CHECK-NEXT: # ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 877; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 878 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 879 ret <16 x i16> %res 880} 881 882define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 883; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 884; X86: # %bb.0: 885; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 886; X86-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 887; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 888; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 889; X86-NEXT: retl # encoding: [0xc3] 890; 891; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256: 892; X64: # %bb.0: 893; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 894; X64-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] 895; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 896; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 897; X64-NEXT: retq # encoding: [0xc3] 898 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 899 ret <16 x i16> %res 900} 901 902define <16 x i16>@test_int_x86_avx512_maskz_pshufl_w_256(<16 x i16> %x0, i16 %x3) { 903; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_256: 904; X86: # %bb.0: 905; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 906; X86-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 907; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 908; X86-NEXT: retl # encoding: [0xc3] 909; 910; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_256: 911; X64: # %bb.0: 912; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 913; X64-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] 914; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] 915; X64-NEXT: retq # encoding: [0xc3] 916 %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 917 ret <16 x i16> %res 918} 919 920define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { 921; CHECK-LABEL: test_pcmpeq_b_256: 922; CHECK: # %bb.0: 923; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 924; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 925; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 926; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 927 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 928 ret i32 %res 929} 930 931define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 932; X86-LABEL: test_mask_pcmpeq_b_256: 933; X86: # %bb.0: 934; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 935; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 936; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 937; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 938; X86-NEXT: retl # encoding: [0xc3] 939; 940; X64-LABEL: test_mask_pcmpeq_b_256: 941; X64: # %bb.0: 942; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 943; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 944; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 945; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 946; X64-NEXT: retq # encoding: [0xc3] 947 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 948 ret i32 %res 949} 950 951declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32) 952 953define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { 954; CHECK-LABEL: test_pcmpeq_w_256: 955; CHECK: # %bb.0: 956; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 957; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 958; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 959; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 960; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 961 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 962 ret i16 %res 963} 964 965define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 966; X86-LABEL: test_mask_pcmpeq_w_256: 967; X86: # %bb.0: 968; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 969; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 970; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 971; X86-NEXT: # kill: def $ax killed $ax killed $eax 972; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 973; X86-NEXT: retl # encoding: [0xc3] 974; 975; X64-LABEL: test_mask_pcmpeq_w_256: 976; X64: # %bb.0: 977; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 978; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 979; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 980; X64-NEXT: # kill: def $ax killed $ax killed $eax 981; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 982; X64-NEXT: retq # encoding: [0xc3] 983 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 984 ret i16 %res 985} 986 987declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16) 988 989define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { 990; CHECK-LABEL: test_pcmpgt_b_256: 991; CHECK: # %bb.0: 992; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 993; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 994; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 995; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 996 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) 997 ret i32 %res 998} 999 1000define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 1001; X86-LABEL: test_mask_pcmpgt_b_256: 1002; X86: # %bb.0: 1003; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 1004; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1005; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 1006; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1007; X86-NEXT: retl # encoding: [0xc3] 1008; 1009; X64-LABEL: test_mask_pcmpgt_b_256: 1010; X64: # %bb.0: 1011; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 1012; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1013; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1014; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1015; X64-NEXT: retq # encoding: [0xc3] 1016 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) 1017 ret i32 %res 1018} 1019 1020declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32) 1021 1022define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { 1023; CHECK-LABEL: test_pcmpgt_w_256: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1026; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1027; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1028; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1029; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1030 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) 1031 ret i16 %res 1032} 1033 1034define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1035; X86-LABEL: test_mask_pcmpgt_w_256: 1036; X86: # %bb.0: 1037; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1038; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1039; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1040; X86-NEXT: # kill: def $ax killed $ax killed $eax 1041; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1042; X86-NEXT: retl # encoding: [0xc3] 1043; 1044; X64-LABEL: test_mask_pcmpgt_w_256: 1045; X64: # %bb.0: 1046; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] 1047; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1048; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1049; X64-NEXT: # kill: def $ax killed $ax killed $eax 1050; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1051; X64-NEXT: retq # encoding: [0xc3] 1052 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) 1053 ret i16 %res 1054} 1055 1056declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16) 1057 1058define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) { 1059; CHECK-LABEL: test_pcmpeq_b_128: 1060; CHECK: # %bb.0: 1061; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1062; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1063; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1064; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1065 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 1066 ret i16 %res 1067} 1068 1069define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1070; X86-LABEL: test_mask_pcmpeq_b_128: 1071; X86: # %bb.0: 1072; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1073; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1074; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1075; X86-NEXT: # kill: def $ax killed $ax killed $eax 1076; X86-NEXT: retl # encoding: [0xc3] 1077; 1078; X64-LABEL: test_mask_pcmpeq_b_128: 1079; X64: # %bb.0: 1080; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 1081; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1082; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1083; X64-NEXT: # kill: def $ax killed $ax killed $eax 1084; X64-NEXT: retq # encoding: [0xc3] 1085 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 1086 ret i16 %res 1087} 1088 1089declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16) 1090 1091define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) { 1092; CHECK-LABEL: test_pcmpeq_w_128: 1093; CHECK: # %bb.0: 1094; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1095; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1096; CHECK-NEXT: # kill: def $al killed $al killed $eax 1097; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1098 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 1099 ret i8 %res 1100} 1101 1102define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1103; X86-LABEL: test_mask_pcmpeq_w_128: 1104; X86: # %bb.0: 1105; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1106; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1107; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1108; X86-NEXT: # kill: def $al killed $al killed $eax 1109; X86-NEXT: retl # encoding: [0xc3] 1110; 1111; X64-LABEL: test_mask_pcmpeq_w_128: 1112; X64: # %bb.0: 1113; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 1114; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1115; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1116; X64-NEXT: # kill: def $al killed $al killed $eax 1117; X64-NEXT: retq # encoding: [0xc3] 1118 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 1119 ret i8 %res 1120} 1121 1122declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8) 1123 1124define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) { 1125; CHECK-LABEL: test_pcmpgt_b_128: 1126; CHECK: # %bb.0: 1127; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1128; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1129; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 1130; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1131 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 1132 ret i16 %res 1133} 1134 1135define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1136; X86-LABEL: test_mask_pcmpgt_b_128: 1137; X86: # %bb.0: 1138; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1139; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1140; X86-NEXT: andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04] 1141; X86-NEXT: # kill: def $ax killed $ax killed $eax 1142; X86-NEXT: retl # encoding: [0xc3] 1143; 1144; X64-LABEL: test_mask_pcmpgt_b_128: 1145; X64: # %bb.0: 1146; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 1147; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1148; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 1149; X64-NEXT: # kill: def $ax killed $ax killed $eax 1150; X64-NEXT: retq # encoding: [0xc3] 1151 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 1152 ret i16 %res 1153} 1154 1155declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16) 1156 1157define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) { 1158; CHECK-LABEL: test_pcmpgt_w_128: 1159; CHECK: # %bb.0: 1160; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1161; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1162; CHECK-NEXT: # kill: def $al killed $al killed $eax 1163; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1164 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 1165 ret i8 %res 1166} 1167 1168define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1169; X86-LABEL: test_mask_pcmpgt_w_128: 1170; X86: # %bb.0: 1171; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1172; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1173; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1174; X86-NEXT: # kill: def $al killed $al killed $eax 1175; X86-NEXT: retl # encoding: [0xc3] 1176; 1177; X64-LABEL: test_mask_pcmpgt_w_128: 1178; X64: # %bb.0: 1179; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 1180; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1181; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1182; X64-NEXT: # kill: def $al killed $al killed $eax 1183; X64-NEXT: retq # encoding: [0xc3] 1184 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 1185 ret i8 %res 1186} 1187 1188declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8) 1189 1190declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1191 1192define <16 x i8>@test_int_x86_avx512_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 1193; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_128: 1194; CHECK: # %bb.0: 1195; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] 1196; CHECK-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1197; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1198 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1199 ret <16 x i8> %res 1200} 1201 1202define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1203; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1204; X86: # %bb.0: 1205; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1206; X86-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1207; X86-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1208; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1209; X86-NEXT: retl # encoding: [0xc3] 1210; 1211; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: 1212; X64: # %bb.0: 1213; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1214; X64-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] 1215; X64-NEXT: # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 1216; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1217; X64-NEXT: retq # encoding: [0xc3] 1218 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1219 ret <16 x i8> %res 1220} 1221 1222declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 1223 1224define <16 x i8>@test_int_x86_avx512_ask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 1225; CHECK-LABEL: test_int_x86_avx512_ask_punpcklb_w_128: 1226; CHECK: # %bb.0: 1227; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] 1228; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1229; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1230 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 1231 ret <16 x i8> %res 1232} 1233 1234define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1235; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1236; X86: # %bb.0: 1237; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1238; X86-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1239; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1240; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1241; X86-NEXT: retl # encoding: [0xc3] 1242; 1243; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: 1244; X64: # %bb.0: 1245; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1246; X64-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] 1247; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1248; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1249; X64-NEXT: retq # encoding: [0xc3] 1250 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 1251 ret <16 x i8> %res 1252} 1253 1254declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1255 1256define <32 x i8>@test_int_x86_avx512_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 1257; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_256: 1258; CHECK: # %bb.0: 1259; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xc1] 1260; CHECK-NEXT: # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1261; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1262 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1263 ret <32 x i8> %res 1264} 1265 1266define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1267; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1268; X86: # %bb.0: 1269; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1270; X86-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1271; X86-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1272; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1273; X86-NEXT: retl # encoding: [0xc3] 1274; 1275; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: 1276; X64: # %bb.0: 1277; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1278; X64-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] 1279; X64-NEXT: # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 1280; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1281; X64-NEXT: retq # encoding: [0xc3] 1282 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1283 ret <32 x i8> %res 1284} 1285 1286declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 1287 1288define <32 x i8>@test_int_x86_avx512_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 1289; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_256: 1290; CHECK: # %bb.0: 1291; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xc1] 1292; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1293; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1294 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 1295 ret <32 x i8> %res 1296} 1297 1298define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1299; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1300; X86: # %bb.0: 1301; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1302; X86-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1303; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1304; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1305; X86-NEXT: retl # encoding: [0xc3] 1306; 1307; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: 1308; X64: # %bb.0: 1309; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1310; X64-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] 1311; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 1312; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1313; X64-NEXT: retq # encoding: [0xc3] 1314 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 1315 ret <32 x i8> %res 1316} 1317 1318declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1319 1320define <8 x i16>@test_int_x86_avx512_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1321; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_128: 1322; CHECK: # %bb.0: 1323; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] 1324; CHECK-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1325; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1326 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1327 ret <8 x i16> %res 1328} 1329 1330define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1331; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1332; X86: # %bb.0: 1333; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1334; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1335; X86-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1336; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1337; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1338; X86-NEXT: retl # encoding: [0xc3] 1339; 1340; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: 1341; X64: # %bb.0: 1342; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1343; X64-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] 1344; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1345; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1346; X64-NEXT: retq # encoding: [0xc3] 1347 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1348 ret <8 x i16> %res 1349} 1350 1351declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1352 1353define <8 x i16>@test_int_x86_avx512_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1354; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_128: 1355; CHECK: # %bb.0: 1356; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] 1357; CHECK-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1358; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1359 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1360 ret <8 x i16> %res 1361} 1362 1363define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1364; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1365; X86: # %bb.0: 1366; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1367; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1368; X86-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1369; X86-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1370; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1371; X86-NEXT: retl # encoding: [0xc3] 1372; 1373; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: 1374; X64: # %bb.0: 1375; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1376; X64-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] 1377; X64-NEXT: # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1378; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1379; X64-NEXT: retq # encoding: [0xc3] 1380 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1381 ret <8 x i16> %res 1382} 1383 1384declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1385 1386define <16 x i16>@test_int_x86_avx512_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1387; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_256: 1388; CHECK: # %bb.0: 1389; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xc1] 1390; CHECK-NEXT: # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1391; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1392 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1393 ret <16 x i16> %res 1394} 1395 1396define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1397; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1398; X86: # %bb.0: 1399; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1400; X86-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1401; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1402; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1403; X86-NEXT: retl # encoding: [0xc3] 1404; 1405; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: 1406; X64: # %bb.0: 1407; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1408; X64-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] 1409; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 1410; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1411; X64-NEXT: retq # encoding: [0xc3] 1412 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1413 ret <16 x i16> %res 1414} 1415 1416declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1417 1418define <16 x i16>@test_int_x86_avx512_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1419; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_256: 1420; CHECK: # %bb.0: 1421; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xc1] 1422; CHECK-NEXT: # ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1423; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1424 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1425 ret <16 x i16> %res 1426} 1427 1428define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1429; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1430; X86: # %bb.0: 1431; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1432; X86-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1433; X86-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1434; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1435; X86-NEXT: retl # encoding: [0xc3] 1436; 1437; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: 1438; X64: # %bb.0: 1439; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1440; X64-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] 1441; X64-NEXT: # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 1442; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1443; X64-NEXT: retq # encoding: [0xc3] 1444 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1445 ret <16 x i16> %res 1446} 1447 1448define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1449; CHECK-LABEL: test_mask_add_epi16_rr_128: 1450; CHECK: # %bb.0: 1451; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 1452; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1453 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1454 ret <8 x i16> %res 1455} 1456 1457define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1458; X86-LABEL: test_mask_add_epi16_rrk_128: 1459; X86: # %bb.0: 1460; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1461; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1462; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1463; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1464; X86-NEXT: retl # encoding: [0xc3] 1465; 1466; X64-LABEL: test_mask_add_epi16_rrk_128: 1467; X64: # %bb.0: 1468; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1469; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 1470; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1471; X64-NEXT: retq # encoding: [0xc3] 1472 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1473 ret <8 x i16> %res 1474} 1475 1476define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1477; X86-LABEL: test_mask_add_epi16_rrkz_128: 1478; X86: # %bb.0: 1479; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1480; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1481; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1482; X86-NEXT: retl # encoding: [0xc3] 1483; 1484; X64-LABEL: test_mask_add_epi16_rrkz_128: 1485; X64: # %bb.0: 1486; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1487; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 1488; X64-NEXT: retq # encoding: [0xc3] 1489 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1490 ret <8 x i16> %res 1491} 1492 1493define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1494; X86-LABEL: test_mask_add_epi16_rm_128: 1495; X86: # %bb.0: 1496; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1497; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00] 1498; X86-NEXT: retl # encoding: [0xc3] 1499; 1500; X64-LABEL: test_mask_add_epi16_rm_128: 1501; X64: # %bb.0: 1502; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07] 1503; X64-NEXT: retq # encoding: [0xc3] 1504 %b = load <8 x i16>, <8 x i16>* %ptr_b 1505 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1506 ret <8 x i16> %res 1507} 1508 1509define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1510; X86-LABEL: test_mask_add_epi16_rmk_128: 1511; X86: # %bb.0: 1512; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1513; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1514; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1515; X86-NEXT: vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08] 1516; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1517; X86-NEXT: retl # encoding: [0xc3] 1518; 1519; X64-LABEL: test_mask_add_epi16_rmk_128: 1520; X64: # %bb.0: 1521; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1522; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] 1523; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1524; X64-NEXT: retq # encoding: [0xc3] 1525 %b = load <8 x i16>, <8 x i16>* %ptr_b 1526 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1527 ret <8 x i16> %res 1528} 1529 1530define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1531; X86-LABEL: test_mask_add_epi16_rmkz_128: 1532; X86: # %bb.0: 1533; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1534; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1535; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1536; X86-NEXT: vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00] 1537; X86-NEXT: retl # encoding: [0xc3] 1538; 1539; X64-LABEL: test_mask_add_epi16_rmkz_128: 1540; X64: # %bb.0: 1541; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1542; X64-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] 1543; X64-NEXT: retq # encoding: [0xc3] 1544 %b = load <8 x i16>, <8 x i16>* %ptr_b 1545 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1546 ret <8 x i16> %res 1547} 1548 1549declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1550 1551define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1552; CHECK-LABEL: test_mask_add_epi16_rr_256: 1553; CHECK: # %bb.0: 1554; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 1555; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1556 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1557 ret <16 x i16> %res 1558} 1559 1560define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1561; X86-LABEL: test_mask_add_epi16_rrk_256: 1562; X86: # %bb.0: 1563; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1564; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1565; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1566; X86-NEXT: retl # encoding: [0xc3] 1567; 1568; X64-LABEL: test_mask_add_epi16_rrk_256: 1569; X64: # %bb.0: 1570; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1571; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 1572; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1573; X64-NEXT: retq # encoding: [0xc3] 1574 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1575 ret <16 x i16> %res 1576} 1577 1578define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1579; X86-LABEL: test_mask_add_epi16_rrkz_256: 1580; X86: # %bb.0: 1581; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1582; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1583; X86-NEXT: retl # encoding: [0xc3] 1584; 1585; X64-LABEL: test_mask_add_epi16_rrkz_256: 1586; X64: # %bb.0: 1587; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1588; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 1589; X64-NEXT: retq # encoding: [0xc3] 1590 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1591 ret <16 x i16> %res 1592} 1593 1594define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1595; X86-LABEL: test_mask_add_epi16_rm_256: 1596; X86: # %bb.0: 1597; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1598; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00] 1599; X86-NEXT: retl # encoding: [0xc3] 1600; 1601; X64-LABEL: test_mask_add_epi16_rm_256: 1602; X64: # %bb.0: 1603; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07] 1604; X64-NEXT: retq # encoding: [0xc3] 1605 %b = load <16 x i16>, <16 x i16>* %ptr_b 1606 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1607 ret <16 x i16> %res 1608} 1609 1610define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1611; X86-LABEL: test_mask_add_epi16_rmk_256: 1612; X86: # %bb.0: 1613; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1614; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1615; X86-NEXT: vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08] 1616; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1617; X86-NEXT: retl # encoding: [0xc3] 1618; 1619; X64-LABEL: test_mask_add_epi16_rmk_256: 1620; X64: # %bb.0: 1621; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1622; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] 1623; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1624; X64-NEXT: retq # encoding: [0xc3] 1625 %b = load <16 x i16>, <16 x i16>* %ptr_b 1626 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1627 ret <16 x i16> %res 1628} 1629 1630define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1631; X86-LABEL: test_mask_add_epi16_rmkz_256: 1632; X86: # %bb.0: 1633; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1634; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1635; X86-NEXT: vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00] 1636; X86-NEXT: retl # encoding: [0xc3] 1637; 1638; X64-LABEL: test_mask_add_epi16_rmkz_256: 1639; X64: # %bb.0: 1640; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1641; X64-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] 1642; X64-NEXT: retq # encoding: [0xc3] 1643 %b = load <16 x i16>, <16 x i16>* %ptr_b 1644 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1645 ret <16 x i16> %res 1646} 1647 1648declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1649 1650define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1651; CHECK-LABEL: test_mask_sub_epi16_rr_128: 1652; CHECK: # %bb.0: 1653; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 1654; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1655 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1656 ret <8 x i16> %res 1657} 1658 1659define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1660; X86-LABEL: test_mask_sub_epi16_rrk_128: 1661; X86: # %bb.0: 1662; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1663; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1664; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1665; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1666; X86-NEXT: retl # encoding: [0xc3] 1667; 1668; X64-LABEL: test_mask_sub_epi16_rrk_128: 1669; X64: # %bb.0: 1670; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1671; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 1672; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1673; X64-NEXT: retq # encoding: [0xc3] 1674 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1675 ret <8 x i16> %res 1676} 1677 1678define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1679; X86-LABEL: test_mask_sub_epi16_rrkz_128: 1680; X86: # %bb.0: 1681; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1682; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1683; X86-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1684; X86-NEXT: retl # encoding: [0xc3] 1685; 1686; X64-LABEL: test_mask_sub_epi16_rrkz_128: 1687; X64: # %bb.0: 1688; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1689; X64-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 1690; X64-NEXT: retq # encoding: [0xc3] 1691 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1692 ret <8 x i16> %res 1693} 1694 1695define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1696; X86-LABEL: test_mask_sub_epi16_rm_128: 1697; X86: # %bb.0: 1698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1699; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00] 1700; X86-NEXT: retl # encoding: [0xc3] 1701; 1702; X64-LABEL: test_mask_sub_epi16_rm_128: 1703; X64: # %bb.0: 1704; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07] 1705; X64-NEXT: retq # encoding: [0xc3] 1706 %b = load <8 x i16>, <8 x i16>* %ptr_b 1707 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1708 ret <8 x i16> %res 1709} 1710 1711define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1712; X86-LABEL: test_mask_sub_epi16_rmk_128: 1713; X86: # %bb.0: 1714; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1716; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1717; X86-NEXT: vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08] 1718; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1719; X86-NEXT: retl # encoding: [0xc3] 1720; 1721; X64-LABEL: test_mask_sub_epi16_rmk_128: 1722; X64: # %bb.0: 1723; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1724; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] 1725; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1726; X64-NEXT: retq # encoding: [0xc3] 1727 %b = load <8 x i16>, <8 x i16>* %ptr_b 1728 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1729 ret <8 x i16> %res 1730} 1731 1732define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1733; X86-LABEL: test_mask_sub_epi16_rmkz_128: 1734; X86: # %bb.0: 1735; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1736; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1737; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1738; X86-NEXT: vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00] 1739; X86-NEXT: retl # encoding: [0xc3] 1740; 1741; X64-LABEL: test_mask_sub_epi16_rmkz_128: 1742; X64: # %bb.0: 1743; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1744; X64-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] 1745; X64-NEXT: retq # encoding: [0xc3] 1746 %b = load <8 x i16>, <8 x i16>* %ptr_b 1747 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1748 ret <8 x i16> %res 1749} 1750 1751declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1752 1753define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1754; CHECK-LABEL: test_mask_sub_epi16_rr_256: 1755; CHECK: # %bb.0: 1756; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] 1757; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1758 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1759 ret <16 x i16> %res 1760} 1761 1762define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1763; X86-LABEL: test_mask_sub_epi16_rrk_256: 1764; X86: # %bb.0: 1765; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1766; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1767; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1768; X86-NEXT: retl # encoding: [0xc3] 1769; 1770; X64-LABEL: test_mask_sub_epi16_rrk_256: 1771; X64: # %bb.0: 1772; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1773; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 1774; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1775; X64-NEXT: retq # encoding: [0xc3] 1776 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1777 ret <16 x i16> %res 1778} 1779 1780define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1781; X86-LABEL: test_mask_sub_epi16_rrkz_256: 1782; X86: # %bb.0: 1783; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1784; X86-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1785; X86-NEXT: retl # encoding: [0xc3] 1786; 1787; X64-LABEL: test_mask_sub_epi16_rrkz_256: 1788; X64: # %bb.0: 1789; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1790; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 1791; X64-NEXT: retq # encoding: [0xc3] 1792 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1793 ret <16 x i16> %res 1794} 1795 1796define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1797; X86-LABEL: test_mask_sub_epi16_rm_256: 1798; X86: # %bb.0: 1799; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1800; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00] 1801; X86-NEXT: retl # encoding: [0xc3] 1802; 1803; X64-LABEL: test_mask_sub_epi16_rm_256: 1804; X64: # %bb.0: 1805; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07] 1806; X64-NEXT: retq # encoding: [0xc3] 1807 %b = load <16 x i16>, <16 x i16>* %ptr_b 1808 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1809 ret <16 x i16> %res 1810} 1811 1812define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1813; X86-LABEL: test_mask_sub_epi16_rmk_256: 1814; X86: # %bb.0: 1815; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1816; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1817; X86-NEXT: vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08] 1818; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1819; X86-NEXT: retl # encoding: [0xc3] 1820; 1821; X64-LABEL: test_mask_sub_epi16_rmk_256: 1822; X64: # %bb.0: 1823; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1824; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] 1825; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1826; X64-NEXT: retq # encoding: [0xc3] 1827 %b = load <16 x i16>, <16 x i16>* %ptr_b 1828 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1829 ret <16 x i16> %res 1830} 1831 1832define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1833; X86-LABEL: test_mask_sub_epi16_rmkz_256: 1834; X86: # %bb.0: 1835; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1836; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1837; X86-NEXT: vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00] 1838; X86-NEXT: retl # encoding: [0xc3] 1839; 1840; X64-LABEL: test_mask_sub_epi16_rmkz_256: 1841; X64: # %bb.0: 1842; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1843; X64-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] 1844; X64-NEXT: retq # encoding: [0xc3] 1845 %b = load <16 x i16>, <16 x i16>* %ptr_b 1846 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1847 ret <16 x i16> %res 1848} 1849 1850declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1851 1852define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1853; CHECK-LABEL: test_mask_add_epi16_rr_512: 1854; CHECK: # %bb.0: 1855; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 1856; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1857 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1858 ret <32 x i16> %res 1859} 1860 1861define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1862; X86-LABEL: test_mask_add_epi16_rrk_512: 1863; X86: # %bb.0: 1864; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1865; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1866; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1867; X86-NEXT: retl # encoding: [0xc3] 1868; 1869; X64-LABEL: test_mask_add_epi16_rrk_512: 1870; X64: # %bb.0: 1871; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1872; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 1873; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1874; X64-NEXT: retq # encoding: [0xc3] 1875 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1876 ret <32 x i16> %res 1877} 1878 1879define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1880; X86-LABEL: test_mask_add_epi16_rrkz_512: 1881; X86: # %bb.0: 1882; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1883; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1884; X86-NEXT: retl # encoding: [0xc3] 1885; 1886; X64-LABEL: test_mask_add_epi16_rrkz_512: 1887; X64: # %bb.0: 1888; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1889; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 1890; X64-NEXT: retq # encoding: [0xc3] 1891 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1892 ret <32 x i16> %res 1893} 1894 1895define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1896; X86-LABEL: test_mask_add_epi16_rm_512: 1897; X86: # %bb.0: 1898; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1899; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00] 1900; X86-NEXT: retl # encoding: [0xc3] 1901; 1902; X64-LABEL: test_mask_add_epi16_rm_512: 1903; X64: # %bb.0: 1904; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] 1905; X64-NEXT: retq # encoding: [0xc3] 1906 %b = load <32 x i16>, <32 x i16>* %ptr_b 1907 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1908 ret <32 x i16> %res 1909} 1910 1911define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1912; X86-LABEL: test_mask_add_epi16_rmk_512: 1913; X86: # %bb.0: 1914; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1915; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1916; X86-NEXT: vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08] 1917; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1918; X86-NEXT: retl # encoding: [0xc3] 1919; 1920; X64-LABEL: test_mask_add_epi16_rmk_512: 1921; X64: # %bb.0: 1922; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1923; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] 1924; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1925; X64-NEXT: retq # encoding: [0xc3] 1926 %b = load <32 x i16>, <32 x i16>* %ptr_b 1927 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1928 ret <32 x i16> %res 1929} 1930 1931define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1932; X86-LABEL: test_mask_add_epi16_rmkz_512: 1933; X86: # %bb.0: 1934; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1935; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1936; X86-NEXT: vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00] 1937; X86-NEXT: retl # encoding: [0xc3] 1938; 1939; X64-LABEL: test_mask_add_epi16_rmkz_512: 1940; X64: # %bb.0: 1941; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1942; X64-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] 1943; X64-NEXT: retq # encoding: [0xc3] 1944 %b = load <32 x i16>, <32 x i16>* %ptr_b 1945 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1946 ret <32 x i16> %res 1947} 1948 1949declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1950 1951define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1952; CHECK-LABEL: test_mask_sub_epi16_rr_512: 1953; CHECK: # %bb.0: 1954; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] 1955; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1956 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1957 ret <32 x i16> %res 1958} 1959 1960define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1961; X86-LABEL: test_mask_sub_epi16_rrk_512: 1962; X86: # %bb.0: 1963; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1964; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1965; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1966; X86-NEXT: retl # encoding: [0xc3] 1967; 1968; X64-LABEL: test_mask_sub_epi16_rrk_512: 1969; X64: # %bb.0: 1970; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1971; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 1972; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1973; X64-NEXT: retq # encoding: [0xc3] 1974 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1975 ret <32 x i16> %res 1976} 1977 1978define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1979; X86-LABEL: test_mask_sub_epi16_rrkz_512: 1980; X86: # %bb.0: 1981; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1982; X86-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1983; X86-NEXT: retl # encoding: [0xc3] 1984; 1985; X64-LABEL: test_mask_sub_epi16_rrkz_512: 1986; X64: # %bb.0: 1987; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1988; X64-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 1989; X64-NEXT: retq # encoding: [0xc3] 1990 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1991 ret <32 x i16> %res 1992} 1993 1994define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1995; X86-LABEL: test_mask_sub_epi16_rm_512: 1996; X86: # %bb.0: 1997; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1998; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00] 1999; X86-NEXT: retl # encoding: [0xc3] 2000; 2001; X64-LABEL: test_mask_sub_epi16_rm_512: 2002; X64: # %bb.0: 2003; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] 2004; X64-NEXT: retq # encoding: [0xc3] 2005 %b = load <32 x i16>, <32 x i16>* %ptr_b 2006 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2007 ret <32 x i16> %res 2008} 2009 2010define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 2011; X86-LABEL: test_mask_sub_epi16_rmk_512: 2012; X86: # %bb.0: 2013; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2014; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2015; X86-NEXT: vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08] 2016; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2017; X86-NEXT: retl # encoding: [0xc3] 2018; 2019; X64-LABEL: test_mask_sub_epi16_rmk_512: 2020; X64: # %bb.0: 2021; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2022; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] 2023; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2024; X64-NEXT: retq # encoding: [0xc3] 2025 %b = load <32 x i16>, <32 x i16>* %ptr_b 2026 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2027 ret <32 x i16> %res 2028} 2029 2030define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 2031; X86-LABEL: test_mask_sub_epi16_rmkz_512: 2032; X86: # %bb.0: 2033; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2034; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2035; X86-NEXT: vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00] 2036; X86-NEXT: retl # encoding: [0xc3] 2037; 2038; X64-LABEL: test_mask_sub_epi16_rmkz_512: 2039; X64: # %bb.0: 2040; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2041; X64-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] 2042; X64-NEXT: retq # encoding: [0xc3] 2043 %b = load <32 x i16>, <32 x i16>* %ptr_b 2044 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2045 ret <32 x i16> %res 2046} 2047 2048declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2049 2050define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 2051; CHECK-LABEL: test_mask_mullo_epi16_rr_512: 2052; CHECK: # %bb.0: 2053; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] 2054; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2055 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2056 ret <32 x i16> %res 2057} 2058 2059define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 2060; X86-LABEL: test_mask_mullo_epi16_rrk_512: 2061; X86: # %bb.0: 2062; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2063; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 2064; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2065; X86-NEXT: retl # encoding: [0xc3] 2066; 2067; X64-LABEL: test_mask_mullo_epi16_rrk_512: 2068; X64: # %bb.0: 2069; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2070; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 2071; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 2072; X64-NEXT: retq # encoding: [0xc3] 2073 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2074 ret <32 x i16> %res 2075} 2076 2077define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 2078; X86-LABEL: test_mask_mullo_epi16_rrkz_512: 2079; X86: # %bb.0: 2080; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2081; X86-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 2082; X86-NEXT: retl # encoding: [0xc3] 2083; 2084; X64-LABEL: test_mask_mullo_epi16_rrkz_512: 2085; X64: # %bb.0: 2086; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2087; X64-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 2088; X64-NEXT: retq # encoding: [0xc3] 2089 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2090 ret <32 x i16> %res 2091} 2092 2093define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 2094; X86-LABEL: test_mask_mullo_epi16_rm_512: 2095; X86: # %bb.0: 2096; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2097; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00] 2098; X86-NEXT: retl # encoding: [0xc3] 2099; 2100; X64-LABEL: test_mask_mullo_epi16_rm_512: 2101; X64: # %bb.0: 2102; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] 2103; X64-NEXT: retq # encoding: [0xc3] 2104 %b = load <32 x i16>, <32 x i16>* %ptr_b 2105 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2106 ret <32 x i16> %res 2107} 2108 2109define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 2110; X86-LABEL: test_mask_mullo_epi16_rmk_512: 2111; X86: # %bb.0: 2112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2113; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2114; X86-NEXT: vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08] 2115; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2116; X86-NEXT: retl # encoding: [0xc3] 2117; 2118; X64-LABEL: test_mask_mullo_epi16_rmk_512: 2119; X64: # %bb.0: 2120; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2121; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] 2122; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 2123; X64-NEXT: retq # encoding: [0xc3] 2124 %b = load <32 x i16>, <32 x i16>* %ptr_b 2125 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2126 ret <32 x i16> %res 2127} 2128 2129define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 2130; X86-LABEL: test_mask_mullo_epi16_rmkz_512: 2131; X86: # %bb.0: 2132; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2133; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2134; X86-NEXT: vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00] 2135; X86-NEXT: retl # encoding: [0xc3] 2136; 2137; X64-LABEL: test_mask_mullo_epi16_rmkz_512: 2138; X64: # %bb.0: 2139; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2140; X64-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] 2141; X64-NEXT: retq # encoding: [0xc3] 2142 %b = load <32 x i16>, <32 x i16>* %ptr_b 2143 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2144 ret <32 x i16> %res 2145} 2146 2147declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2148 2149define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2150; CHECK-LABEL: test_mask_mullo_epi16_rr_128: 2151; CHECK: # %bb.0: 2152; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 2153; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2154 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2155 ret <8 x i16> %res 2156} 2157 2158define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2159; X86-LABEL: test_mask_mullo_epi16_rrk_128: 2160; X86: # %bb.0: 2161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2162; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2163; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2164; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2165; X86-NEXT: retl # encoding: [0xc3] 2166; 2167; X64-LABEL: test_mask_mullo_epi16_rrk_128: 2168; X64: # %bb.0: 2169; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2170; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2171; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2172; X64-NEXT: retq # encoding: [0xc3] 2173 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2174 ret <8 x i16> %res 2175} 2176 2177define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2178; X86-LABEL: test_mask_mullo_epi16_rrkz_128: 2179; X86: # %bb.0: 2180; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2181; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2182; X86-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2183; X86-NEXT: retl # encoding: [0xc3] 2184; 2185; X64-LABEL: test_mask_mullo_epi16_rrkz_128: 2186; X64: # %bb.0: 2187; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2188; X64-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2189; X64-NEXT: retq # encoding: [0xc3] 2190 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2191 ret <8 x i16> %res 2192} 2193 2194define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2195; X86-LABEL: test_mask_mullo_epi16_rm_128: 2196; X86: # %bb.0: 2197; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2198; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00] 2199; X86-NEXT: retl # encoding: [0xc3] 2200; 2201; X64-LABEL: test_mask_mullo_epi16_rm_128: 2202; X64: # %bb.0: 2203; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07] 2204; X64-NEXT: retq # encoding: [0xc3] 2205 %b = load <8 x i16>, <8 x i16>* %ptr_b 2206 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2207 ret <8 x i16> %res 2208} 2209 2210define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2211; X86-LABEL: test_mask_mullo_epi16_rmk_128: 2212; X86: # %bb.0: 2213; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2214; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2215; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2216; X86-NEXT: vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08] 2217; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2218; X86-NEXT: retl # encoding: [0xc3] 2219; 2220; X64-LABEL: test_mask_mullo_epi16_rmk_128: 2221; X64: # %bb.0: 2222; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2223; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] 2224; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2225; X64-NEXT: retq # encoding: [0xc3] 2226 %b = load <8 x i16>, <8 x i16>* %ptr_b 2227 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2228 ret <8 x i16> %res 2229} 2230 2231define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2232; X86-LABEL: test_mask_mullo_epi16_rmkz_128: 2233; X86: # %bb.0: 2234; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2235; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2236; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2237; X86-NEXT: vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00] 2238; X86-NEXT: retl # encoding: [0xc3] 2239; 2240; X64-LABEL: test_mask_mullo_epi16_rmkz_128: 2241; X64: # %bb.0: 2242; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2243; X64-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] 2244; X64-NEXT: retq # encoding: [0xc3] 2245 %b = load <8 x i16>, <8 x i16>* %ptr_b 2246 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2247 ret <8 x i16> %res 2248} 2249 2250declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2251 2252define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2253; CHECK-LABEL: test_mask_mullo_epi16_rr_256: 2254; CHECK: # %bb.0: 2255; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1] 2256; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2257 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2258 ret <16 x i16> %res 2259} 2260 2261define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2262; X86-LABEL: test_mask_mullo_epi16_rrk_256: 2263; X86: # %bb.0: 2264; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2265; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2266; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2267; X86-NEXT: retl # encoding: [0xc3] 2268; 2269; X64-LABEL: test_mask_mullo_epi16_rrk_256: 2270; X64: # %bb.0: 2271; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2272; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2273; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2274; X64-NEXT: retq # encoding: [0xc3] 2275 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2276 ret <16 x i16> %res 2277} 2278 2279define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2280; X86-LABEL: test_mask_mullo_epi16_rrkz_256: 2281; X86: # %bb.0: 2282; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2283; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2284; X86-NEXT: retl # encoding: [0xc3] 2285; 2286; X64-LABEL: test_mask_mullo_epi16_rrkz_256: 2287; X64: # %bb.0: 2288; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2289; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2290; X64-NEXT: retq # encoding: [0xc3] 2291 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2292 ret <16 x i16> %res 2293} 2294 2295define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2296; X86-LABEL: test_mask_mullo_epi16_rm_256: 2297; X86: # %bb.0: 2298; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2299; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00] 2300; X86-NEXT: retl # encoding: [0xc3] 2301; 2302; X64-LABEL: test_mask_mullo_epi16_rm_256: 2303; X64: # %bb.0: 2304; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07] 2305; X64-NEXT: retq # encoding: [0xc3] 2306 %b = load <16 x i16>, <16 x i16>* %ptr_b 2307 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2308 ret <16 x i16> %res 2309} 2310 2311define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2312; X86-LABEL: test_mask_mullo_epi16_rmk_256: 2313; X86: # %bb.0: 2314; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2315; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2316; X86-NEXT: vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08] 2317; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2318; X86-NEXT: retl # encoding: [0xc3] 2319; 2320; X64-LABEL: test_mask_mullo_epi16_rmk_256: 2321; X64: # %bb.0: 2322; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2323; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] 2324; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2325; X64-NEXT: retq # encoding: [0xc3] 2326 %b = load <16 x i16>, <16 x i16>* %ptr_b 2327 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2328 ret <16 x i16> %res 2329} 2330 2331define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2332; X86-LABEL: test_mask_mullo_epi16_rmkz_256: 2333; X86: # %bb.0: 2334; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2335; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2336; X86-NEXT: vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00] 2337; X86-NEXT: retl # encoding: [0xc3] 2338; 2339; X64-LABEL: test_mask_mullo_epi16_rmkz_256: 2340; X64: # %bb.0: 2341; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2342; X64-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] 2343; X64-NEXT: retq # encoding: [0xc3] 2344 %b = load <16 x i16>, <16 x i16>* %ptr_b 2345 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2346 ret <16 x i16> %res 2347} 2348 2349declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2350 2351declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2352 2353define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2354; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2355; X86: # %bb.0: 2356; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2357; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2358; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2359; X86-NEXT: retl # encoding: [0xc3] 2360; 2361; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 2362; X64: # %bb.0: 2363; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2364; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 2365; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2366; X64-NEXT: retq # encoding: [0xc3] 2367 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) 2368 ret <16 x i8> %res 2369} 2370 2371define <16 x i8>@test_int_x86_avx512_maskz_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2372; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128: 2373; X86: # %bb.0: 2374; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2375; X86-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2376; X86-NEXT: retl # encoding: [0xc3] 2377; 2378; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128: 2379; X64: # %bb.0: 2380; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2381; X64-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 2382; X64-NEXT: retq # encoding: [0xc3] 2383 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2384 ret <16 x i8> %res 2385} 2386 2387declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2388 2389define <32 x i8>@test_int_x86_avx512_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2390; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_256: 2391; CHECK: # %bb.0: 2392; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1] 2393; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2394 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2395 ret <32 x i8> %res 2396} 2397 2398define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2399; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2400; X86: # %bb.0: 2401; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2402; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2403; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2404; X86-NEXT: retl # encoding: [0xc3] 2405; 2406; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 2407; X64: # %bb.0: 2408; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2409; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 2410; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2411; X64-NEXT: retq # encoding: [0xc3] 2412 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2413 ret <32 x i8> %res 2414} 2415 2416declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2417 2418define <8 x i16>@test_int_x86_avx512_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2419; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_128: 2420; CHECK: # %bb.0: 2421; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 2422; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2423 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2424 ret <8 x i16> %res 2425} 2426 2427define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2428; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2429; X86: # %bb.0: 2430; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2431; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2432; X86-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2433; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2434; X86-NEXT: retl # encoding: [0xc3] 2435; 2436; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 2437; X64: # %bb.0: 2438; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2439; X64-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 2440; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2441; X64-NEXT: retq # encoding: [0xc3] 2442 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2443 ret <8 x i16> %res 2444} 2445 2446declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2447 2448define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2449; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2450; X86: # %bb.0: 2451; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2452; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2453; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2454; X86-NEXT: retl # encoding: [0xc3] 2455; 2456; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 2457; X64: # %bb.0: 2458; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2459; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 2460; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2461; X64-NEXT: retq # encoding: [0xc3] 2462 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2463 ret <16 x i16> %res 2464} 2465 2466define <16 x i16>@test_int_x86_avx512_maskz_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2467; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256: 2468; X86: # %bb.0: 2469; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2470; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2471; X86-NEXT: retl # encoding: [0xc3] 2472; 2473; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256: 2474; X64: # %bb.0: 2475; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2476; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 2477; X64-NEXT: retq # encoding: [0xc3] 2478 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2479 ret <16 x i16> %res 2480} 2481 2482declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2483 2484define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { 2485; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2486; X86: # %bb.0: 2487; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2488; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2489; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2490; X86-NEXT: retl # encoding: [0xc3] 2491; 2492; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 2493; X64: # %bb.0: 2494; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2495; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 2496; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2497; X64-NEXT: retq # encoding: [0xc3] 2498 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2499 ret <16 x i8> %res 2500} 2501 2502define <16 x i8>@test_int_x86_avx512_maskz_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2503; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128: 2504; X86: # %bb.0: 2505; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2506; X86-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2507; X86-NEXT: retl # encoding: [0xc3] 2508; 2509; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128: 2510; X64: # %bb.0: 2511; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2512; X64-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 2513; X64-NEXT: retq # encoding: [0xc3] 2514 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2515 ret <16 x i8> %res 2516} 2517 2518declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2519 2520define <32 x i8>@test_int_x86_avx512_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2521; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_256: 2522; CHECK: # %bb.0: 2523; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1] 2524; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2525 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2526 ret <32 x i8> %res 2527} 2528 2529define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2530; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2531; X86: # %bb.0: 2532; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2533; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2534; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2535; X86-NEXT: retl # encoding: [0xc3] 2536; 2537; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 2538; X64: # %bb.0: 2539; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2540; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 2541; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2542; X64-NEXT: retq # encoding: [0xc3] 2543 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2544 ret <32 x i8> %res 2545} 2546 2547declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2548 2549define <8 x i16>@test_int_x86_avx512_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2550; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_128: 2551; CHECK: # %bb.0: 2552; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 2553; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2554 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2555 ret <8 x i16> %res 2556} 2557 2558define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2559; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2560; X86: # %bb.0: 2561; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2562; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2563; X86-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2564; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2565; X86-NEXT: retl # encoding: [0xc3] 2566; 2567; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 2568; X64: # %bb.0: 2569; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2570; X64-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 2571; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2572; X64-NEXT: retq # encoding: [0xc3] 2573 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2574 ret <8 x i16> %res 2575} 2576 2577declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2578 2579define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2580; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2581; X86: # %bb.0: 2582; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2583; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2584; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2585; X86-NEXT: retl # encoding: [0xc3] 2586; 2587; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 2588; X64: # %bb.0: 2589; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2590; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 2591; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2592; X64-NEXT: retq # encoding: [0xc3] 2593 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2594 ret <16 x i16> %res 2595} 2596 2597define <16 x i16>@test_int_x86_avx512_maskz_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2598; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256: 2599; X86: # %bb.0: 2600; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2601; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2602; X86-NEXT: retl # encoding: [0xc3] 2603; 2604; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256: 2605; X64: # %bb.0: 2606; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2607; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 2608; X64-NEXT: retq # encoding: [0xc3] 2609 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2610 ret <16 x i16> %res 2611} 2612 2613declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2614 2615define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2616; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2617; X86: # %bb.0: 2618; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2619; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2620; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2621; X86-NEXT: retl # encoding: [0xc3] 2622; 2623; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128: 2624; X64: # %bb.0: 2625; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2626; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 2627; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2628; X64-NEXT: retq # encoding: [0xc3] 2629 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2630 ret <16 x i8> %res 2631} 2632 2633define <16 x i8>@test_int_x86_avx512_maskz_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2634; X86-LABEL: test_int_x86_avx512_maskz_pmins_b_128: 2635; X86: # %bb.0: 2636; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2637; X86-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2638; X86-NEXT: retl # encoding: [0xc3] 2639; 2640; X64-LABEL: test_int_x86_avx512_maskz_pmins_b_128: 2641; X64: # %bb.0: 2642; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2643; X64-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 2644; X64-NEXT: retq # encoding: [0xc3] 2645 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2646 ret <16 x i8> %res 2647} 2648 2649declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2650 2651define <32 x i8>@test_int_x86_avx512_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2652; CHECK-LABEL: test_int_x86_avx512_pmins_b_256: 2653; CHECK: # %bb.0: 2654; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1] 2655; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2656 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2657 ret <32 x i8> %res 2658} 2659 2660define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2661; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2662; X86: # %bb.0: 2663; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2664; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2665; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2666; X86-NEXT: retl # encoding: [0xc3] 2667; 2668; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256: 2669; X64: # %bb.0: 2670; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2671; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 2672; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2673; X64-NEXT: retq # encoding: [0xc3] 2674 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2675 ret <32 x i8> %res 2676} 2677 2678declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2679 2680define <8 x i16>@test_int_x86_avx512_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2681; CHECK-LABEL: test_int_x86_avx512_pmins_w_128: 2682; CHECK: # %bb.0: 2683; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 2684; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2685 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2686 ret <8 x i16> %res 2687} 2688 2689define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2690; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2691; X86: # %bb.0: 2692; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2693; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2694; X86-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2695; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2696; X86-NEXT: retl # encoding: [0xc3] 2697; 2698; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128: 2699; X64: # %bb.0: 2700; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2701; X64-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 2702; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2703; X64-NEXT: retq # encoding: [0xc3] 2704 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2705 ret <8 x i16> %res 2706} 2707 2708declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2709 2710define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2711; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2712; X86: # %bb.0: 2713; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2714; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2715; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2716; X86-NEXT: retl # encoding: [0xc3] 2717; 2718; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256: 2719; X64: # %bb.0: 2720; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2721; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 2722; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2723; X64-NEXT: retq # encoding: [0xc3] 2724 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2725 ret <16 x i16> %res 2726} 2727 2728define <16 x i16>@test_int_x86_avx512_maskz_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2729; X86-LABEL: test_int_x86_avx512_maskz_pmins_w_256: 2730; X86: # %bb.0: 2731; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2732; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2733; X86-NEXT: retl # encoding: [0xc3] 2734; 2735; X64-LABEL: test_int_x86_avx512_maskz_pmins_w_256: 2736; X64: # %bb.0: 2737; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2738; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 2739; X64-NEXT: retq # encoding: [0xc3] 2740 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2741 ret <16 x i16> %res 2742} 2743 2744declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2745 2746define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 2747; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2748; X86: # %bb.0: 2749; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2750; X86-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2751; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2752; X86-NEXT: retl # encoding: [0xc3] 2753; 2754; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128: 2755; X64: # %bb.0: 2756; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2757; X64-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 2758; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2759; X64-NEXT: retq # encoding: [0xc3] 2760 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 2761 ret <16 x i8> %res 2762} 2763 2764define <16 x i8>@test_int_x86_avx512_maskz_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { 2765; X86-LABEL: test_int_x86_avx512_maskz_pminu_b_128: 2766; X86: # %bb.0: 2767; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2768; X86-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2769; X86-NEXT: retl # encoding: [0xc3] 2770; 2771; X64-LABEL: test_int_x86_avx512_maskz_pminu_b_128: 2772; X64: # %bb.0: 2773; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2774; X64-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 2775; X64-NEXT: retq # encoding: [0xc3] 2776 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 2777 ret <16 x i8> %res 2778} 2779 2780declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2781 2782define <32 x i8>@test_int_x86_avx512_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 2783; CHECK-LABEL: test_int_x86_avx512_pminu_b_256: 2784; CHECK: # %bb.0: 2785; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1] 2786; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2787 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 2788 ret <32 x i8> %res 2789} 2790 2791define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 2792; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2793; X86: # %bb.0: 2794; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2795; X86-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2796; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2797; X86-NEXT: retl # encoding: [0xc3] 2798; 2799; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256: 2800; X64: # %bb.0: 2801; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2802; X64-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 2803; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2804; X64-NEXT: retq # encoding: [0xc3] 2805 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 2806 ret <32 x i8> %res 2807} 2808 2809declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2810 2811define <8 x i16>@test_int_x86_avx512_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2812; CHECK-LABEL: test_int_x86_avx512_pminu_w_128: 2813; CHECK: # %bb.0: 2814; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 2815; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2816 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2817 ret <8 x i16> %res 2818} 2819 2820define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2821; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2822; X86: # %bb.0: 2823; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2824; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2825; X86-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2826; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2827; X86-NEXT: retl # encoding: [0xc3] 2828; 2829; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128: 2830; X64: # %bb.0: 2831; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2832; X64-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 2833; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2834; X64-NEXT: retq # encoding: [0xc3] 2835 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2836 ret <8 x i16> %res 2837} 2838 2839declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2840 2841define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 2842; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2843; X86: # %bb.0: 2844; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2845; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2846; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2847; X86-NEXT: retl # encoding: [0xc3] 2848; 2849; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256: 2850; X64: # %bb.0: 2851; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2852; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 2853; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2854; X64-NEXT: retq # encoding: [0xc3] 2855 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 2856 ret <16 x i16> %res 2857} 2858 2859define <16 x i16>@test_int_x86_avx512_maskz_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) { 2860; X86-LABEL: test_int_x86_avx512_maskz_pminu_w_256: 2861; X86: # %bb.0: 2862; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2863; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2864; X86-NEXT: retl # encoding: [0xc3] 2865; 2866; X64-LABEL: test_int_x86_avx512_maskz_pminu_w_256: 2867; X64: # %bb.0: 2868; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2869; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 2870; X64-NEXT: retq # encoding: [0xc3] 2871 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 2872 ret <16 x i16> %res 2873} 2874 2875declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2876 2877define <8 x i16>@test_int_x86_avx512_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2878; CHECK-LABEL: test_int_x86_avx512_psrl_w_128: 2879; CHECK: # %bb.0: 2880; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] 2881; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2882 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2883 ret <8 x i16> %res 2884} 2885 2886define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2887; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2888; X86: # %bb.0: 2889; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2890; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2891; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2892; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2893; X86-NEXT: retl # encoding: [0xc3] 2894; 2895; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128: 2896; X64: # %bb.0: 2897; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2898; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 2899; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2900; X64-NEXT: retq # encoding: [0xc3] 2901 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2902 ret <8 x i16> %res 2903} 2904 2905 2906define <8 x i16>@test_int_x86_avx512_maskz_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2907; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_128: 2908; X86: # %bb.0: 2909; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2910; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2911; X86-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2912; X86-NEXT: retl # encoding: [0xc3] 2913; 2914; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_128: 2915; X64: # %bb.0: 2916; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2917; X64-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] 2918; X64-NEXT: retq # encoding: [0xc3] 2919 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2920 ret <8 x i16> %res 2921} 2922 2923declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 2924 2925define <16 x i16>@test_int_x86_avx512_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 2926; CHECK-LABEL: test_int_x86_avx512_psrl_w_256: 2927; CHECK: # %bb.0: 2928; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] 2929; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2930 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 2931 ret <16 x i16> %res 2932} 2933 2934define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2935; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2936; X86: # %bb.0: 2937; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2938; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2939; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2940; X86-NEXT: retl # encoding: [0xc3] 2941; 2942; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256: 2943; X64: # %bb.0: 2944; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2945; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 2946; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2947; X64-NEXT: retq # encoding: [0xc3] 2948 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 2949 ret <16 x i16> %res 2950} 2951 2952define <16 x i16>@test_int_x86_avx512_maskz_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 2953; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_256: 2954; X86: # %bb.0: 2955; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2956; X86-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2957; X86-NEXT: retl # encoding: [0xc3] 2958; 2959; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_256: 2960; X64: # %bb.0: 2961; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2962; X64-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] 2963; X64-NEXT: retq # encoding: [0xc3] 2964 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2965 ret <16 x i16> %res 2966} 2967 2968declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2969 2970define <8 x i16>@test_int_x86_avx512_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2971; CHECK-LABEL: test_int_x86_avx512_psra_w_128: 2972; CHECK: # %bb.0: 2973; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] 2974; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2975 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2976 ret <8 x i16> %res 2977} 2978 2979define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2980; X86-LABEL: test_int_x86_avx512_mask_psra_w_128: 2981; X86: # %bb.0: 2982; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2983; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2984; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2985; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2986; X86-NEXT: retl # encoding: [0xc3] 2987; 2988; X64-LABEL: test_int_x86_avx512_mask_psra_w_128: 2989; X64: # %bb.0: 2990; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2991; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 2992; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2993; X64-NEXT: retq # encoding: [0xc3] 2994 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2995 ret <8 x i16> %res 2996} 2997 2998define <8 x i16>@test_int_x86_avx512_maskz_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2999; X86-LABEL: test_int_x86_avx512_maskz_psra_w_128: 3000; X86: # %bb.0: 3001; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3002; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3003; X86-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 3004; X86-NEXT: retl # encoding: [0xc3] 3005; 3006; X64-LABEL: test_int_x86_avx512_maskz_psra_w_128: 3007; X64: # %bb.0: 3008; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3009; X64-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] 3010; X64-NEXT: retq # encoding: [0xc3] 3011 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3012 ret <8 x i16> %res 3013} 3014 3015declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 3016 3017define <16 x i16>@test_int_x86_avx512_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 3018; CHECK-LABEL: test_int_x86_avx512_psra_w_256: 3019; CHECK: # %bb.0: 3020; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1] 3021; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3022 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 3023 ret <16 x i16> %res 3024} 3025 3026define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3027; X86-LABEL: test_int_x86_avx512_mask_psra_w_256: 3028; X86: # %bb.0: 3029; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3030; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 3031; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3032; X86-NEXT: retl # encoding: [0xc3] 3033; 3034; X64-LABEL: test_int_x86_avx512_mask_psra_w_256: 3035; X64: # %bb.0: 3036; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3037; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 3038; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3039; X64-NEXT: retq # encoding: [0xc3] 3040 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 3041 ret <16 x i16> %res 3042} 3043 3044define <16 x i16>@test_int_x86_avx512_maskz_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 3045; X86-LABEL: test_int_x86_avx512_maskz_psra_w_256: 3046; X86: # %bb.0: 3047; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3048; X86-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 3049; X86-NEXT: retl # encoding: [0xc3] 3050; 3051; X64-LABEL: test_int_x86_avx512_maskz_psra_w_256: 3052; X64: # %bb.0: 3053; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3054; X64-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] 3055; X64-NEXT: retq # encoding: [0xc3] 3056 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3057 ret <16 x i16> %res 3058} 3059 3060declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3061 3062define <8 x i16>@test_int_x86_avx512_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 3063; CHECK-LABEL: test_int_x86_avx512_psll_w_128: 3064; CHECK: # %bb.0: 3065; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] 3066; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3067 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3068 ret <8 x i16> %res 3069} 3070 3071define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3072; X86-LABEL: test_int_x86_avx512_mask_psll_w_128: 3073; X86: # %bb.0: 3074; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3075; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3076; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 3077; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3078; X86-NEXT: retl # encoding: [0xc3] 3079; 3080; X64-LABEL: test_int_x86_avx512_mask_psll_w_128: 3081; X64: # %bb.0: 3082; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3083; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 3084; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3085; X64-NEXT: retq # encoding: [0xc3] 3086 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3087 ret <8 x i16> %res 3088} 3089 3090define <8 x i16>@test_int_x86_avx512_maskz_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 3091; X86-LABEL: test_int_x86_avx512_maskz_psll_w_128: 3092; X86: # %bb.0: 3093; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3094; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3095; X86-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 3096; X86-NEXT: retl # encoding: [0xc3] 3097; 3098; X64-LABEL: test_int_x86_avx512_maskz_psll_w_128: 3099; X64: # %bb.0: 3100; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3101; X64-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] 3102; X64-NEXT: retq # encoding: [0xc3] 3103 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3104 ret <8 x i16> %res 3105} 3106 3107declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 3108 3109define <16 x i16>@test_int_x86_avx512_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) { 3110; CHECK-LABEL: test_int_x86_avx512_psll_w_256: 3111; CHECK: # %bb.0: 3112; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1] 3113; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3114 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 3115 ret <16 x i16> %res 3116} 3117 3118define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3119; X86-LABEL: test_int_x86_avx512_mask_psll_w_256: 3120; X86: # %bb.0: 3121; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3122; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 3123; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3124; X86-NEXT: retl # encoding: [0xc3] 3125; 3126; X64-LABEL: test_int_x86_avx512_mask_psll_w_256: 3127; X64: # %bb.0: 3128; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3129; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 3130; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3131; X64-NEXT: retq # encoding: [0xc3] 3132 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 3133 ret <16 x i16> %res 3134} 3135 3136define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) { 3137; X86-LABEL: test_int_x86_avx512_maskz_psll_w_256: 3138; X86: # %bb.0: 3139; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3140; X86-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 3141; X86-NEXT: retl # encoding: [0xc3] 3142; 3143; X64-LABEL: test_int_x86_avx512_maskz_psll_w_256: 3144; X64: # %bb.0: 3145; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3146; X64-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] 3147; X64-NEXT: retq # encoding: [0xc3] 3148 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3149 ret <16 x i16> %res 3150} 3151 3152declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3153 3154define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3155; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 3156; X86: # %bb.0: 3157; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3158; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3159; X86-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] 3160; X86-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] 3161; X86-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] 3162; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3163; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3164; X86-NEXT: retl # encoding: [0xc3] 3165; 3166; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 3167; X64: # %bb.0: 3168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3169; X64-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] 3170; X64-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] 3171; X64-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] 3172; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3173; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3174; X64-NEXT: retq # encoding: [0xc3] 3175 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3176 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1) 3177 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3) 3178 %res3 = add <8 x i16> %res, %res1 3179 %res4 = add <8 x i16> %res2, %res3 3180 ret <8 x i16> %res4 3181} 3182 3183declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3184 3185define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3186; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 3187; X86: # %bb.0: 3188; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3189; X86-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] 3190; X86-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] 3191; X86-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] 3192; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3193; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3194; X86-NEXT: retl # encoding: [0xc3] 3195; 3196; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 3197; X64: # %bb.0: 3198; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3199; X64-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] 3200; X64-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] 3201; X64-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] 3202; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3203; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3204; X64-NEXT: retq # encoding: [0xc3] 3205 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3206 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1) 3207 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3) 3208 %res3 = add <16 x i16> %res, %res1 3209 %res4 = add <16 x i16> %res3, %res2 3210 ret <16 x i16> %res4 3211} 3212 3213declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3214 3215define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3216; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128: 3217; X86: # %bb.0: 3218; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3219; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3220; X86-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] 3221; X86-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] 3222; X86-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] 3223; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3224; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3225; X86-NEXT: retl # encoding: [0xc3] 3226; 3227; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128: 3228; X64: # %bb.0: 3229; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3230; X64-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] 3231; X64-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] 3232; X64-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] 3233; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3234; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3235; X64-NEXT: retq # encoding: [0xc3] 3236 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3237 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) 3238 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) 3239 %res3 = add <8 x i16> %res, %res1 3240 %res4 = add <8 x i16> %res3, %res2 3241 ret <8 x i16> %res4 3242} 3243 3244declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3245 3246define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3247; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256: 3248; X86: # %bb.0: 3249; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3250; X86-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] 3251; X86-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] 3252; X86-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] 3253; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3254; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3255; X86-NEXT: retl # encoding: [0xc3] 3256; 3257; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256: 3258; X64: # %bb.0: 3259; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3260; X64-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] 3261; X64-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] 3262; X64-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] 3263; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3264; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3265; X64-NEXT: retq # encoding: [0xc3] 3266 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3267 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) 3268 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) 3269 %res3 = add <16 x i16> %res, %res1 3270 %res4 = add <16 x i16> %res3, %res2 3271 ret <16 x i16> %res4 3272} 3273 3274declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) 3275 3276define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 3277; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128: 3278; X86: # %bb.0: 3279; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3280; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3281; X86-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] 3282; X86-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] 3283; X86-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] 3284; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3285; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3286; X86-NEXT: retl # encoding: [0xc3] 3287; 3288; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128: 3289; X64: # %bb.0: 3290; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3291; X64-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] 3292; X64-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] 3293; X64-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] 3294; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3295; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] 3296; X64-NEXT: retq # encoding: [0xc3] 3297 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 3298 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) 3299 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) 3300 %res3 = add <8 x i16> %res, %res1 3301 %res4 = add <8 x i16> %res3, %res2 3302 ret <8 x i16> %res4 3303} 3304 3305declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) 3306 3307define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 3308; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256: 3309; X86: # %bb.0: 3310; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3311; X86-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] 3312; X86-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] 3313; X86-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] 3314; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3315; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3316; X86-NEXT: retl # encoding: [0xc3] 3317; 3318; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256: 3319; X64: # %bb.0: 3320; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3321; X64-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] 3322; X64-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] 3323; X64-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] 3324; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3325; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] 3326; X64-NEXT: retq # encoding: [0xc3] 3327 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 3328 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) 3329 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) 3330 %res3 = add <16 x i16> %res, %res1 3331 %res4 = add <16 x i16> %res3, %res2 3332 ret <16 x i16> %res4 3333} 3334 3335declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3336 3337define <16 x i8>@test_int_x86_avx512_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 3338; CHECK-LABEL: test_int_x86_avx512_pshuf_b_128: 3339; CHECK: # %bb.0: 3340; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3341; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3342 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 3343 ret <16 x i8> %res 3344} 3345 3346define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 3347; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 3348; X86: # %bb.0: 3349; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3350; X86-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 3351; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3352; X86-NEXT: retl # encoding: [0xc3] 3353; 3354; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 3355; X64: # %bb.0: 3356; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3357; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 3358; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3359; X64-NEXT: retq # encoding: [0xc3] 3360 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 3361 ret <16 x i8> %res 3362} 3363 3364declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3365 3366define <32 x i8>@test_int_x86_avx512_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 3367; CHECK-LABEL: test_int_x86_avx512_pshuf_b_256: 3368; CHECK: # %bb.0: 3369; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 3370; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3371 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 3372 ret <32 x i8> %res 3373} 3374 3375define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 3376; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 3377; X86: # %bb.0: 3378; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 3379; X86-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 3380; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3381; X86-NEXT: retl # encoding: [0xc3] 3382; 3383; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 3384; X64: # %bb.0: 3385; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3386; X64-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 3387; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3388; X64-NEXT: retq # encoding: [0xc3] 3389 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 3390 ret <32 x i8> %res 3391} 3392 3393declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8) 3394 3395define <8 x i16>@test_int_x86_avx512_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1) { 3396; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_128: 3397; CHECK: # %bb.0: 3398; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 3399; CHECK-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3400; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3401 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 3402 ret <8 x i16> %res 3403} 3404 3405define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 3406; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 3407; X86: # %bb.0: 3408; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3409; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3410; X86-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 3411; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3412; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3413; X86-NEXT: retl # encoding: [0xc3] 3414; 3415; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 3416; X64: # %bb.0: 3417; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3418; X64-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 3419; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3420; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3421; X64-NEXT: retq # encoding: [0xc3] 3422 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 3423 ret <8 x i16> %res 3424} 3425 3426define <8 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_128(<16 x i8> %x0, i8 %x2) { 3427; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128: 3428; X86: # %bb.0: 3429; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3430; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3431; X86-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 3432; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3433; X86-NEXT: retl # encoding: [0xc3] 3434; 3435; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128: 3436; X64: # %bb.0: 3437; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3438; X64-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] 3439; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3440; X64-NEXT: retq # encoding: [0xc3] 3441 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 3442 ret <8 x i16> %res 3443} 3444 3445declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16) 3446 3447define <16 x i16>@test_int_x86_avx512_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1) { 3448; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_256: 3449; CHECK: # %bb.0: 3450; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0] 3451; CHECK-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3452; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3453 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3454 ret <16 x i16> %res 3455} 3456 3457define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 3458; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 3459; X86: # %bb.0: 3460; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3461; X86-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 3462; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3463; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3464; X86-NEXT: retl # encoding: [0xc3] 3465; 3466; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 3467; X64: # %bb.0: 3468; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3469; X64-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 3470; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3471; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3472; X64-NEXT: retq # encoding: [0xc3] 3473 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3474 ret <16 x i16> %res 3475} 3476 3477define <16 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_256(<16 x i8> %x0, i16 %x2) { 3478; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256: 3479; X86: # %bb.0: 3480; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3481; X86-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 3482; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3483; X86-NEXT: retl # encoding: [0xc3] 3484; 3485; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256: 3486; X64: # %bb.0: 3487; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3488; X64-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] 3489; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 3490; X64-NEXT: retq # encoding: [0xc3] 3491 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3492 ret <16 x i16> %res 3493} 3494 3495declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8) 3496 3497define <8 x i16>@test_int_x86_avx512_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1) { 3498; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_128: 3499; CHECK: # %bb.0: 3500; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 3501; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3502 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 3503 ret <8 x i16> %res 3504} 3505 3506define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 3507; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3508; X86: # %bb.0: 3509; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3510; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3511; X86-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3512; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3513; X86-NEXT: retl # encoding: [0xc3] 3514; 3515; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 3516; X64: # %bb.0: 3517; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3518; X64-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 3519; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3520; X64-NEXT: retq # encoding: [0xc3] 3521 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 3522 ret <8 x i16> %res 3523} 3524 3525define <8 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_128(<16 x i8> %x0, i8 %x2) { 3526; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128: 3527; X86: # %bb.0: 3528; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3529; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3530; X86-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3531; X86-NEXT: retl # encoding: [0xc3] 3532; 3533; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128: 3534; X64: # %bb.0: 3535; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3536; X64-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] 3537; X64-NEXT: retq # encoding: [0xc3] 3538 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 3539 ret <8 x i16> %res 3540} 3541 3542declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16) 3543 3544define <16 x i16>@test_int_x86_avx512_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1) { 3545; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_256: 3546; CHECK: # %bb.0: 3547; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xc0] 3548; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3549 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 3550 ret <16 x i16> %res 3551} 3552 3553define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 3554; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3555; X86: # %bb.0: 3556; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3557; X86-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3558; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3559; X86-NEXT: retl # encoding: [0xc3] 3560; 3561; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 3562; X64: # %bb.0: 3563; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3564; X64-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 3565; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3566; X64-NEXT: retq # encoding: [0xc3] 3567 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 3568 ret <16 x i16> %res 3569} 3570 3571define <16 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_256(<16 x i8> %x0, i16 %x2) { 3572; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256: 3573; X86: # %bb.0: 3574; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3575; X86-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3576; X86-NEXT: retl # encoding: [0xc3] 3577; 3578; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256: 3579; X64: # %bb.0: 3580; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3581; X64-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] 3582; X64-NEXT: retq # encoding: [0xc3] 3583 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 3584 ret <16 x i16> %res 3585} 3586 3587declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8) 3588 3589define <2 x i64>@test_int_x86_avx512_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1) { 3590; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_128: 3591; CHECK: # %bb.0: 3592; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 3593; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3594 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 3595 ret <2 x i64> %res 3596} 3597 3598define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 3599; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3600; X86: # %bb.0: 3601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3602; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3603; X86-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3604; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3605; X86-NEXT: retl # encoding: [0xc3] 3606; 3607; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 3608; X64: # %bb.0: 3609; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3610; X64-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 3611; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3612; X64-NEXT: retq # encoding: [0xc3] 3613 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 3614 ret <2 x i64> %res 3615} 3616 3617define <2 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_128(<4 x i32> %x0, i8 %x2) { 3618; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128: 3619; X86: # %bb.0: 3620; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3621; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3622; X86-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3623; X86-NEXT: retl # encoding: [0xc3] 3624; 3625; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128: 3626; X64: # %bb.0: 3627; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3628; X64-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] 3629; X64-NEXT: retq # encoding: [0xc3] 3630 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 3631 ret <2 x i64> %res 3632} 3633 3634declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8) 3635 3636define <4 x i64>@test_int_x86_avx512_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1) { 3637; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_256: 3638; CHECK: # %bb.0: 3639; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xc0] 3640; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3641 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 3642 ret <4 x i64> %res 3643} 3644 3645define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 3646; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3647; X86: # %bb.0: 3648; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3649; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3650; X86-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3651; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3652; X86-NEXT: retl # encoding: [0xc3] 3653; 3654; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 3655; X64: # %bb.0: 3656; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3657; X64-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 3658; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3659; X64-NEXT: retq # encoding: [0xc3] 3660 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 3661 ret <4 x i64> %res 3662} 3663 3664define <4 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_256(<4 x i32> %x0, i8 %x2) { 3665; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256: 3666; X86: # %bb.0: 3667; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3668; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3669; X86-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3670; X86-NEXT: retl # encoding: [0xc3] 3671; 3672; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256: 3673; X64: # %bb.0: 3674; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3675; X64-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] 3676; X64-NEXT: retq # encoding: [0xc3] 3677 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 3678 ret <4 x i64> %res 3679} 3680 3681declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16) 3682 3683define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { 3684; X86-LABEL: test_int_x86_avx512_cvtmask2b_128: 3685; X86: # %bb.0: 3686; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3687; X86-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3688; X86-NEXT: retl # encoding: [0xc3] 3689; 3690; X64-LABEL: test_int_x86_avx512_cvtmask2b_128: 3691; X64: # %bb.0: 3692; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3693; X64-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 3694; X64-NEXT: retq # encoding: [0xc3] 3695 %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) 3696 ret <16 x i8> %res 3697} 3698 3699declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32) 3700 3701define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) { 3702; X86-LABEL: test_int_x86_avx512_cvtmask2b_256: 3703; X86: # %bb.0: 3704; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 3705; X86-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3706; X86-NEXT: retl # encoding: [0xc3] 3707; 3708; X64-LABEL: test_int_x86_avx512_cvtmask2b_256: 3709; X64: # %bb.0: 3710; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3711; X64-NEXT: vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 3712; X64-NEXT: retq # encoding: [0xc3] 3713 %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0) 3714 ret <32 x i8> %res 3715} 3716 3717declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8) 3718 3719define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { 3720; X86-LABEL: test_int_x86_avx512_cvtmask2w_128: 3721; X86: # %bb.0: 3722; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3723; X86-NEXT: kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0] 3724; X86-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3725; X86-NEXT: retl # encoding: [0xc3] 3726; 3727; X64-LABEL: test_int_x86_avx512_cvtmask2w_128: 3728; X64: # %bb.0: 3729; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3730; X64-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 3731; X64-NEXT: retq # encoding: [0xc3] 3732 %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) 3733 ret <8 x i16> %res 3734} 3735 3736declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16) 3737 3738define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { 3739; X86-LABEL: test_int_x86_avx512_cvtmask2w_256: 3740; X86: # %bb.0: 3741; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 3742; X86-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3743; X86-NEXT: retl # encoding: [0xc3] 3744; 3745; X64-LABEL: test_int_x86_avx512_cvtmask2w_256: 3746; X64: # %bb.0: 3747; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 3748; X64-NEXT: vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 3749; X64-NEXT: retq # encoding: [0xc3] 3750 %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) 3751 ret <16 x i16> %res 3752} 3753define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3754; CHECK-LABEL: test_mask_packs_epi32_rr_128: 3755; CHECK: # %bb.0: 3756; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 3757; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3758 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3759 ret <8 x i16> %res 3760} 3761 3762define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 3763; X86-LABEL: test_mask_packs_epi32_rrk_128: 3764; X86: # %bb.0: 3765; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3766; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3767; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3768; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3769; X86-NEXT: retl # encoding: [0xc3] 3770; 3771; X64-LABEL: test_mask_packs_epi32_rrk_128: 3772; X64: # %bb.0: 3773; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3774; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 3775; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3776; X64-NEXT: retq # encoding: [0xc3] 3777 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3778 ret <8 x i16> %res 3779} 3780 3781define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3782; X86-LABEL: test_mask_packs_epi32_rrkz_128: 3783; X86: # %bb.0: 3784; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3785; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3786; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3787; X86-NEXT: retl # encoding: [0xc3] 3788; 3789; X64-LABEL: test_mask_packs_epi32_rrkz_128: 3790; X64: # %bb.0: 3791; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3792; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 3793; X64-NEXT: retq # encoding: [0xc3] 3794 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3795 ret <8 x i16> %res 3796} 3797 3798define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3799; X86-LABEL: test_mask_packs_epi32_rm_128: 3800; X86: # %bb.0: 3801; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3802; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 3803; X86-NEXT: retl # encoding: [0xc3] 3804; 3805; X64-LABEL: test_mask_packs_epi32_rm_128: 3806; X64: # %bb.0: 3807; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 3808; X64-NEXT: retq # encoding: [0xc3] 3809 %b = load <4 x i32>, <4 x i32>* %ptr_b 3810 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3811 ret <8 x i16> %res 3812} 3813 3814define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3815; X86-LABEL: test_mask_packs_epi32_rmk_128: 3816; X86: # %bb.0: 3817; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3818; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3819; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3820; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 3821; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3822; X86-NEXT: retl # encoding: [0xc3] 3823; 3824; X64-LABEL: test_mask_packs_epi32_rmk_128: 3825; X64: # %bb.0: 3826; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3827; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 3828; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3829; X64-NEXT: retq # encoding: [0xc3] 3830 %b = load <4 x i32>, <4 x i32>* %ptr_b 3831 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3832 ret <8 x i16> %res 3833} 3834 3835define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3836; X86-LABEL: test_mask_packs_epi32_rmkz_128: 3837; X86: # %bb.0: 3838; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3839; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3840; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3841; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 3842; X86-NEXT: retl # encoding: [0xc3] 3843; 3844; X64-LABEL: test_mask_packs_epi32_rmkz_128: 3845; X64: # %bb.0: 3846; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3847; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 3848; X64-NEXT: retq # encoding: [0xc3] 3849 %b = load <4 x i32>, <4 x i32>* %ptr_b 3850 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3851 ret <8 x i16> %res 3852} 3853 3854define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3855; X86-LABEL: test_mask_packs_epi32_rmb_128: 3856; X86: # %bb.0: 3857; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3858; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 3859; X86-NEXT: retl # encoding: [0xc3] 3860; 3861; X64-LABEL: test_mask_packs_epi32_rmb_128: 3862; X64: # %bb.0: 3863; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 3864; X64-NEXT: retq # encoding: [0xc3] 3865 %q = load i32, i32* %ptr_b 3866 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3867 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3868 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 3869 ret <8 x i16> %res 3870} 3871 3872define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3873; X86-LABEL: test_mask_packs_epi32_rmbk_128: 3874; X86: # %bb.0: 3875; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3876; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3877; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3878; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 3879; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3880; X86-NEXT: retl # encoding: [0xc3] 3881; 3882; X64-LABEL: test_mask_packs_epi32_rmbk_128: 3883; X64: # %bb.0: 3884; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3885; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 3886; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3887; X64-NEXT: retq # encoding: [0xc3] 3888 %q = load i32, i32* %ptr_b 3889 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3890 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3891 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3892 ret <8 x i16> %res 3893} 3894 3895define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3896; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 3897; X86: # %bb.0: 3898; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3899; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3900; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 3901; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 3902; X86-NEXT: retl # encoding: [0xc3] 3903; 3904; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 3905; X64: # %bb.0: 3906; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3907; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 3908; X64-NEXT: retq # encoding: [0xc3] 3909 %q = load i32, i32* %ptr_b 3910 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3911 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3912 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3913 ret <8 x i16> %res 3914} 3915 3916declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 3917 3918define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3919; CHECK-LABEL: test_mask_packs_epi32_rr_256: 3920; CHECK: # %bb.0: 3921; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 3922; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3923 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3924 ret <16 x i16> %res 3925} 3926 3927define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 3928; X86-LABEL: test_mask_packs_epi32_rrk_256: 3929; X86: # %bb.0: 3930; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3931; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3932; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3933; X86-NEXT: retl # encoding: [0xc3] 3934; 3935; X64-LABEL: test_mask_packs_epi32_rrk_256: 3936; X64: # %bb.0: 3937; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3938; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 3939; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3940; X64-NEXT: retq # encoding: [0xc3] 3941 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3942 ret <16 x i16> %res 3943} 3944 3945define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 3946; X86-LABEL: test_mask_packs_epi32_rrkz_256: 3947; X86: # %bb.0: 3948; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3949; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3950; X86-NEXT: retl # encoding: [0xc3] 3951; 3952; X64-LABEL: test_mask_packs_epi32_rrkz_256: 3953; X64: # %bb.0: 3954; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3955; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 3956; X64-NEXT: retq # encoding: [0xc3] 3957 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3958 ret <16 x i16> %res 3959} 3960 3961define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3962; X86-LABEL: test_mask_packs_epi32_rm_256: 3963; X86: # %bb.0: 3964; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3965; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 3966; X86-NEXT: retl # encoding: [0xc3] 3967; 3968; X64-LABEL: test_mask_packs_epi32_rm_256: 3969; X64: # %bb.0: 3970; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 3971; X64-NEXT: retq # encoding: [0xc3] 3972 %b = load <8 x i32>, <8 x i32>* %ptr_b 3973 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3974 ret <16 x i16> %res 3975} 3976 3977define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3978; X86-LABEL: test_mask_packs_epi32_rmk_256: 3979; X86: # %bb.0: 3980; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3981; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3982; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 3983; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3984; X86-NEXT: retl # encoding: [0xc3] 3985; 3986; X64-LABEL: test_mask_packs_epi32_rmk_256: 3987; X64: # %bb.0: 3988; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3989; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 3990; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3991; X64-NEXT: retq # encoding: [0xc3] 3992 %b = load <8 x i32>, <8 x i32>* %ptr_b 3993 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3994 ret <16 x i16> %res 3995} 3996 3997define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 3998; X86-LABEL: test_mask_packs_epi32_rmkz_256: 3999; X86: # %bb.0: 4000; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4001; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4002; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 4003; X86-NEXT: retl # encoding: [0xc3] 4004; 4005; X64-LABEL: test_mask_packs_epi32_rmkz_256: 4006; X64: # %bb.0: 4007; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4008; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 4009; X64-NEXT: retq # encoding: [0xc3] 4010 %b = load <8 x i32>, <8 x i32>* %ptr_b 4011 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4012 ret <16 x i16> %res 4013} 4014 4015define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4016; X86-LABEL: test_mask_packs_epi32_rmb_256: 4017; X86: # %bb.0: 4018; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4019; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 4020; X86-NEXT: retl # encoding: [0xc3] 4021; 4022; X64-LABEL: test_mask_packs_epi32_rmb_256: 4023; X64: # %bb.0: 4024; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 4025; X64-NEXT: retq # encoding: [0xc3] 4026 %q = load i32, i32* %ptr_b 4027 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4028 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4029 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4030 ret <16 x i16> %res 4031} 4032 4033define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 4034; X86-LABEL: test_mask_packs_epi32_rmbk_256: 4035; X86: # %bb.0: 4036; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4037; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4038; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 4039; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4040; X86-NEXT: retl # encoding: [0xc3] 4041; 4042; X64-LABEL: test_mask_packs_epi32_rmbk_256: 4043; X64: # %bb.0: 4044; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4045; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 4046; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4047; X64-NEXT: retq # encoding: [0xc3] 4048 %q = load i32, i32* %ptr_b 4049 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4050 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4051 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4052 ret <16 x i16> %res 4053} 4054 4055define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 4056; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 4057; X86: # %bb.0: 4058; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4059; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4060; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 4061; X86-NEXT: retl # encoding: [0xc3] 4062; 4063; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 4064; X64: # %bb.0: 4065; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4066; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 4067; X64-NEXT: retq # encoding: [0xc3] 4068 %q = load i32, i32* %ptr_b 4069 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4070 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4071 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4072 ret <16 x i16> %res 4073} 4074 4075declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 4076 4077define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 4078; CHECK-LABEL: test_mask_packs_epi16_rr_128: 4079; CHECK: # %bb.0: 4080; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 4081; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4082 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4083 ret <16 x i8> %res 4084} 4085 4086define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 4087; X86-LABEL: test_mask_packs_epi16_rrk_128: 4088; X86: # %bb.0: 4089; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4090; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 4091; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4092; X86-NEXT: retl # encoding: [0xc3] 4093; 4094; X64-LABEL: test_mask_packs_epi16_rrk_128: 4095; X64: # %bb.0: 4096; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4097; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 4098; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4099; X64-NEXT: retq # encoding: [0xc3] 4100 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4101 ret <16 x i8> %res 4102} 4103 4104define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 4105; X86-LABEL: test_mask_packs_epi16_rrkz_128: 4106; X86: # %bb.0: 4107; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4108; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 4109; X86-NEXT: retl # encoding: [0xc3] 4110; 4111; X64-LABEL: test_mask_packs_epi16_rrkz_128: 4112; X64: # %bb.0: 4113; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4114; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 4115; X64-NEXT: retq # encoding: [0xc3] 4116 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4117 ret <16 x i8> %res 4118} 4119 4120define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 4121; X86-LABEL: test_mask_packs_epi16_rm_128: 4122; X86: # %bb.0: 4123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4124; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 4125; X86-NEXT: retl # encoding: [0xc3] 4126; 4127; X64-LABEL: test_mask_packs_epi16_rm_128: 4128; X64: # %bb.0: 4129; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 4130; X64-NEXT: retq # encoding: [0xc3] 4131 %b = load <8 x i16>, <8 x i16>* %ptr_b 4132 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4133 ret <16 x i8> %res 4134} 4135 4136define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 4137; X86-LABEL: test_mask_packs_epi16_rmk_128: 4138; X86: # %bb.0: 4139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4140; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4141; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 4142; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4143; X86-NEXT: retl # encoding: [0xc3] 4144; 4145; X64-LABEL: test_mask_packs_epi16_rmk_128: 4146; X64: # %bb.0: 4147; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4148; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 4149; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4150; X64-NEXT: retq # encoding: [0xc3] 4151 %b = load <8 x i16>, <8 x i16>* %ptr_b 4152 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4153 ret <16 x i8> %res 4154} 4155 4156define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 4157; X86-LABEL: test_mask_packs_epi16_rmkz_128: 4158; X86: # %bb.0: 4159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4160; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4161; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 4162; X86-NEXT: retl # encoding: [0xc3] 4163; 4164; X64-LABEL: test_mask_packs_epi16_rmkz_128: 4165; X64: # %bb.0: 4166; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4167; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 4168; X64-NEXT: retq # encoding: [0xc3] 4169 %b = load <8 x i16>, <8 x i16>* %ptr_b 4170 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4171 ret <16 x i8> %res 4172} 4173 4174declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 4175 4176define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 4177; CHECK-LABEL: test_mask_packs_epi16_rr_256: 4178; CHECK: # %bb.0: 4179; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 4180; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4181 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4182 ret <32 x i8> %res 4183} 4184 4185define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 4186; X86-LABEL: test_mask_packs_epi16_rrk_256: 4187; X86: # %bb.0: 4188; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4189; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 4190; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4191; X86-NEXT: retl # encoding: [0xc3] 4192; 4193; X64-LABEL: test_mask_packs_epi16_rrk_256: 4194; X64: # %bb.0: 4195; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4196; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 4197; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4198; X64-NEXT: retq # encoding: [0xc3] 4199 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4200 ret <32 x i8> %res 4201} 4202 4203define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 4204; X86-LABEL: test_mask_packs_epi16_rrkz_256: 4205; X86: # %bb.0: 4206; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4207; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 4208; X86-NEXT: retl # encoding: [0xc3] 4209; 4210; X64-LABEL: test_mask_packs_epi16_rrkz_256: 4211; X64: # %bb.0: 4212; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4213; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 4214; X64-NEXT: retq # encoding: [0xc3] 4215 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4216 ret <32 x i8> %res 4217} 4218 4219define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 4220; X86-LABEL: test_mask_packs_epi16_rm_256: 4221; X86: # %bb.0: 4222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4223; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 4224; X86-NEXT: retl # encoding: [0xc3] 4225; 4226; X64-LABEL: test_mask_packs_epi16_rm_256: 4227; X64: # %bb.0: 4228; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 4229; X64-NEXT: retq # encoding: [0xc3] 4230 %b = load <16 x i16>, <16 x i16>* %ptr_b 4231 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4232 ret <32 x i8> %res 4233} 4234 4235define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4236; X86-LABEL: test_mask_packs_epi16_rmk_256: 4237; X86: # %bb.0: 4238; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4239; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4240; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 4241; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4242; X86-NEXT: retl # encoding: [0xc3] 4243; 4244; X64-LABEL: test_mask_packs_epi16_rmk_256: 4245; X64: # %bb.0: 4246; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4247; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 4248; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4249; X64-NEXT: retq # encoding: [0xc3] 4250 %b = load <16 x i16>, <16 x i16>* %ptr_b 4251 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4252 ret <32 x i8> %res 4253} 4254 4255define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 4256; X86-LABEL: test_mask_packs_epi16_rmkz_256: 4257; X86: # %bb.0: 4258; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4259; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4260; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 4261; X86-NEXT: retl # encoding: [0xc3] 4262; 4263; X64-LABEL: test_mask_packs_epi16_rmkz_256: 4264; X64: # %bb.0: 4265; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4266; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 4267; X64-NEXT: retq # encoding: [0xc3] 4268 %b = load <16 x i16>, <16 x i16>* %ptr_b 4269 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4270 ret <32 x i8> %res 4271} 4272 4273declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 4274 4275 4276define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4277; CHECK-LABEL: test_mask_packus_epi32_rr_128: 4278; CHECK: # %bb.0: 4279; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 4280; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4281 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4282 ret <8 x i16> %res 4283} 4284 4285define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 4286; X86-LABEL: test_mask_packus_epi32_rrk_128: 4287; X86: # %bb.0: 4288; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4289; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4290; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 4291; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4292; X86-NEXT: retl # encoding: [0xc3] 4293; 4294; X64-LABEL: test_mask_packus_epi32_rrk_128: 4295; X64: # %bb.0: 4296; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4297; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 4298; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4299; X64-NEXT: retq # encoding: [0xc3] 4300 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4301 ret <8 x i16> %res 4302} 4303 4304define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4305; X86-LABEL: test_mask_packus_epi32_rrkz_128: 4306; X86: # %bb.0: 4307; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4308; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4309; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 4310; X86-NEXT: retl # encoding: [0xc3] 4311; 4312; X64-LABEL: test_mask_packus_epi32_rrkz_128: 4313; X64: # %bb.0: 4314; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4315; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 4316; X64-NEXT: retq # encoding: [0xc3] 4317 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4318 ret <8 x i16> %res 4319} 4320 4321define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 4322; X86-LABEL: test_mask_packus_epi32_rm_128: 4323; X86: # %bb.0: 4324; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4325; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 4326; X86-NEXT: retl # encoding: [0xc3] 4327; 4328; X64-LABEL: test_mask_packus_epi32_rm_128: 4329; X64: # %bb.0: 4330; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 4331; X64-NEXT: retq # encoding: [0xc3] 4332 %b = load <4 x i32>, <4 x i32>* %ptr_b 4333 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4334 ret <8 x i16> %res 4335} 4336 4337define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 4338; X86-LABEL: test_mask_packus_epi32_rmk_128: 4339; X86: # %bb.0: 4340; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4341; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4342; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4343; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 4344; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4345; X86-NEXT: retl # encoding: [0xc3] 4346; 4347; X64-LABEL: test_mask_packus_epi32_rmk_128: 4348; X64: # %bb.0: 4349; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4350; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 4351; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4352; X64-NEXT: retq # encoding: [0xc3] 4353 %b = load <4 x i32>, <4 x i32>* %ptr_b 4354 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4355 ret <8 x i16> %res 4356} 4357 4358define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 4359; X86-LABEL: test_mask_packus_epi32_rmkz_128: 4360; X86: # %bb.0: 4361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4362; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4363; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4364; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 4365; X86-NEXT: retl # encoding: [0xc3] 4366; 4367; X64-LABEL: test_mask_packus_epi32_rmkz_128: 4368; X64: # %bb.0: 4369; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4370; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 4371; X64-NEXT: retq # encoding: [0xc3] 4372 %b = load <4 x i32>, <4 x i32>* %ptr_b 4373 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4374 ret <8 x i16> %res 4375} 4376 4377define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 4378; X86-LABEL: test_mask_packus_epi32_rmb_128: 4379; X86: # %bb.0: 4380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4381; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 4382; X86-NEXT: retl # encoding: [0xc3] 4383; 4384; X64-LABEL: test_mask_packus_epi32_rmb_128: 4385; X64: # %bb.0: 4386; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 4387; X64-NEXT: retq # encoding: [0xc3] 4388 %q = load i32, i32* %ptr_b 4389 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4390 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4391 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 4392 ret <8 x i16> %res 4393} 4394 4395define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 4396; X86-LABEL: test_mask_packus_epi32_rmbk_128: 4397; X86: # %bb.0: 4398; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4399; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4400; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4401; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 4402; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4403; X86-NEXT: retl # encoding: [0xc3] 4404; 4405; X64-LABEL: test_mask_packus_epi32_rmbk_128: 4406; X64: # %bb.0: 4407; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4408; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 4409; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4410; X64-NEXT: retq # encoding: [0xc3] 4411 %q = load i32, i32* %ptr_b 4412 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4413 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4414 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 4415 ret <8 x i16> %res 4416} 4417 4418define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4419; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 4420; X86: # %bb.0: 4421; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4422; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4423; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 4424; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 4425; X86-NEXT: retl # encoding: [0xc3] 4426; 4427; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 4428; X64: # %bb.0: 4429; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4430; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 4431; X64-NEXT: retq # encoding: [0xc3] 4432 %q = load i32, i32* %ptr_b 4433 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4434 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4435 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 4436 ret <8 x i16> %res 4437} 4438 4439declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 4440 4441define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4442; CHECK-LABEL: test_mask_packus_epi32_rr_256: 4443; CHECK: # %bb.0: 4444; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 4445; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4446 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4447 ret <16 x i16> %res 4448} 4449 4450define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 4451; X86-LABEL: test_mask_packus_epi32_rrk_256: 4452; X86: # %bb.0: 4453; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4454; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 4455; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4456; X86-NEXT: retl # encoding: [0xc3] 4457; 4458; X64-LABEL: test_mask_packus_epi32_rrk_256: 4459; X64: # %bb.0: 4460; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4461; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 4462; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4463; X64-NEXT: retq # encoding: [0xc3] 4464 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4465 ret <16 x i16> %res 4466} 4467 4468define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 4469; X86-LABEL: test_mask_packus_epi32_rrkz_256: 4470; X86: # %bb.0: 4471; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4472; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 4473; X86-NEXT: retl # encoding: [0xc3] 4474; 4475; X64-LABEL: test_mask_packus_epi32_rrkz_256: 4476; X64: # %bb.0: 4477; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4478; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 4479; X64-NEXT: retq # encoding: [0xc3] 4480 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4481 ret <16 x i16> %res 4482} 4483 4484define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 4485; X86-LABEL: test_mask_packus_epi32_rm_256: 4486; X86: # %bb.0: 4487; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4488; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 4489; X86-NEXT: retl # encoding: [0xc3] 4490; 4491; X64-LABEL: test_mask_packus_epi32_rm_256: 4492; X64: # %bb.0: 4493; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 4494; X64-NEXT: retq # encoding: [0xc3] 4495 %b = load <8 x i32>, <8 x i32>* %ptr_b 4496 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4497 ret <16 x i16> %res 4498} 4499 4500define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 4501; X86-LABEL: test_mask_packus_epi32_rmk_256: 4502; X86: # %bb.0: 4503; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4504; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4505; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 4506; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4507; X86-NEXT: retl # encoding: [0xc3] 4508; 4509; X64-LABEL: test_mask_packus_epi32_rmk_256: 4510; X64: # %bb.0: 4511; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4512; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 4513; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4514; X64-NEXT: retq # encoding: [0xc3] 4515 %b = load <8 x i32>, <8 x i32>* %ptr_b 4516 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4517 ret <16 x i16> %res 4518} 4519 4520define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 4521; X86-LABEL: test_mask_packus_epi32_rmkz_256: 4522; X86: # %bb.0: 4523; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4524; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4525; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 4526; X86-NEXT: retl # encoding: [0xc3] 4527; 4528; X64-LABEL: test_mask_packus_epi32_rmkz_256: 4529; X64: # %bb.0: 4530; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4531; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 4532; X64-NEXT: retq # encoding: [0xc3] 4533 %b = load <8 x i32>, <8 x i32>* %ptr_b 4534 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4535 ret <16 x i16> %res 4536} 4537 4538define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4539; X86-LABEL: test_mask_packus_epi32_rmb_256: 4540; X86: # %bb.0: 4541; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4542; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 4543; X86-NEXT: retl # encoding: [0xc3] 4544; 4545; X64-LABEL: test_mask_packus_epi32_rmb_256: 4546; X64: # %bb.0: 4547; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 4548; X64-NEXT: retq # encoding: [0xc3] 4549 %q = load i32, i32* %ptr_b 4550 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4551 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4552 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 4553 ret <16 x i16> %res 4554} 4555 4556define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 4557; X86-LABEL: test_mask_packus_epi32_rmbk_256: 4558; X86: # %bb.0: 4559; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4560; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4561; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 4562; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4563; X86-NEXT: retl # encoding: [0xc3] 4564; 4565; X64-LABEL: test_mask_packus_epi32_rmbk_256: 4566; X64: # %bb.0: 4567; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4568; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 4569; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4570; X64-NEXT: retq # encoding: [0xc3] 4571 %q = load i32, i32* %ptr_b 4572 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4573 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4574 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 4575 ret <16 x i16> %res 4576} 4577 4578define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 4579; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 4580; X86: # %bb.0: 4581; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4582; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4583; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 4584; X86-NEXT: retl # encoding: [0xc3] 4585; 4586; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 4587; X64: # %bb.0: 4588; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4589; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 4590; X64-NEXT: retq # encoding: [0xc3] 4591 %q = load i32, i32* %ptr_b 4592 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4593 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4594 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 4595 ret <16 x i16> %res 4596} 4597 4598declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 4599 4600define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 4601; CHECK-LABEL: test_mask_packus_epi16_rr_128: 4602; CHECK: # %bb.0: 4603; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 4604; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4605 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4606 ret <16 x i8> %res 4607} 4608 4609define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 4610; X86-LABEL: test_mask_packus_epi16_rrk_128: 4611; X86: # %bb.0: 4612; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4613; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4614; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4615; X86-NEXT: retl # encoding: [0xc3] 4616; 4617; X64-LABEL: test_mask_packus_epi16_rrk_128: 4618; X64: # %bb.0: 4619; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4620; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 4621; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4622; X64-NEXT: retq # encoding: [0xc3] 4623 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4624 ret <16 x i8> %res 4625} 4626 4627define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 4628; X86-LABEL: test_mask_packus_epi16_rrkz_128: 4629; X86: # %bb.0: 4630; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 4631; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4632; X86-NEXT: retl # encoding: [0xc3] 4633; 4634; X64-LABEL: test_mask_packus_epi16_rrkz_128: 4635; X64: # %bb.0: 4636; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4637; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 4638; X64-NEXT: retq # encoding: [0xc3] 4639 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4640 ret <16 x i8> %res 4641} 4642 4643define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 4644; X86-LABEL: test_mask_packus_epi16_rm_128: 4645; X86: # %bb.0: 4646; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4647; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 4648; X86-NEXT: retl # encoding: [0xc3] 4649; 4650; X64-LABEL: test_mask_packus_epi16_rm_128: 4651; X64: # %bb.0: 4652; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 4653; X64-NEXT: retq # encoding: [0xc3] 4654 %b = load <8 x i16>, <8 x i16>* %ptr_b 4655 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 4656 ret <16 x i8> %res 4657} 4658 4659define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 4660; X86-LABEL: test_mask_packus_epi16_rmk_128: 4661; X86: # %bb.0: 4662; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4663; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4664; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 4665; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4666; X86-NEXT: retl # encoding: [0xc3] 4667; 4668; X64-LABEL: test_mask_packus_epi16_rmk_128: 4669; X64: # %bb.0: 4670; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4671; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 4672; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4673; X64-NEXT: retq # encoding: [0xc3] 4674 %b = load <8 x i16>, <8 x i16>* %ptr_b 4675 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 4676 ret <16 x i8> %res 4677} 4678 4679define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 4680; X86-LABEL: test_mask_packus_epi16_rmkz_128: 4681; X86: # %bb.0: 4682; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4683; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 4684; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 4685; X86-NEXT: retl # encoding: [0xc3] 4686; 4687; X64-LABEL: test_mask_packus_epi16_rmkz_128: 4688; X64: # %bb.0: 4689; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4690; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 4691; X64-NEXT: retq # encoding: [0xc3] 4692 %b = load <8 x i16>, <8 x i16>* %ptr_b 4693 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 4694 ret <16 x i8> %res 4695} 4696 4697declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 4698 4699define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 4700; CHECK-LABEL: test_mask_packus_epi16_rr_256: 4701; CHECK: # %bb.0: 4702; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 4703; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4704 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4705 ret <32 x i8> %res 4706} 4707 4708define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 4709; X86-LABEL: test_mask_packus_epi16_rrk_256: 4710; X86: # %bb.0: 4711; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4712; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4713; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4714; X86-NEXT: retl # encoding: [0xc3] 4715; 4716; X64-LABEL: test_mask_packus_epi16_rrk_256: 4717; X64: # %bb.0: 4718; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4719; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 4720; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4721; X64-NEXT: retq # encoding: [0xc3] 4722 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4723 ret <32 x i8> %res 4724} 4725 4726define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 4727; X86-LABEL: test_mask_packus_epi16_rrkz_256: 4728; X86: # %bb.0: 4729; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 4730; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4731; X86-NEXT: retl # encoding: [0xc3] 4732; 4733; X64-LABEL: test_mask_packus_epi16_rrkz_256: 4734; X64: # %bb.0: 4735; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4736; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 4737; X64-NEXT: retq # encoding: [0xc3] 4738 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4739 ret <32 x i8> %res 4740} 4741 4742define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 4743; X86-LABEL: test_mask_packus_epi16_rm_256: 4744; X86: # %bb.0: 4745; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4746; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 4747; X86-NEXT: retl # encoding: [0xc3] 4748; 4749; X64-LABEL: test_mask_packus_epi16_rm_256: 4750; X64: # %bb.0: 4751; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 4752; X64-NEXT: retq # encoding: [0xc3] 4753 %b = load <16 x i16>, <16 x i16>* %ptr_b 4754 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 4755 ret <32 x i8> %res 4756} 4757 4758define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4759; X86-LABEL: test_mask_packus_epi16_rmk_256: 4760; X86: # %bb.0: 4761; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4762; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4763; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 4764; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4765; X86-NEXT: retl # encoding: [0xc3] 4766; 4767; X64-LABEL: test_mask_packus_epi16_rmk_256: 4768; X64: # %bb.0: 4769; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4770; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 4771; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4772; X64-NEXT: retq # encoding: [0xc3] 4773 %b = load <16 x i16>, <16 x i16>* %ptr_b 4774 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 4775 ret <32 x i8> %res 4776} 4777 4778define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 4779; X86-LABEL: test_mask_packus_epi16_rmkz_256: 4780; X86: # %bb.0: 4781; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4782; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 4783; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 4784; X86-NEXT: retl # encoding: [0xc3] 4785; 4786; X64-LABEL: test_mask_packus_epi16_rmkz_256: 4787; X64: # %bb.0: 4788; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 4789; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 4790; X64-NEXT: retq # encoding: [0xc3] 4791 %b = load <16 x i16>, <16 x i16>* %ptr_b 4792 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 4793 ret <32 x i8> %res 4794} 4795 4796declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 4797 4798define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4799; X86-LABEL: test_cmp_b_256: 4800; X86: # %bb.0: 4801; X86-NEXT: pushl %ebx # encoding: [0x53] 4802; X86-NEXT: .cfi_def_cfa_offset 8 4803; X86-NEXT: pushl %edi # encoding: [0x57] 4804; X86-NEXT: .cfi_def_cfa_offset 12 4805; X86-NEXT: pushl %esi # encoding: [0x56] 4806; X86-NEXT: .cfi_def_cfa_offset 16 4807; X86-NEXT: .cfi_offset %esi, -16 4808; X86-NEXT: .cfi_offset %edi, -12 4809; X86-NEXT: .cfi_offset %ebx, -8 4810; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4811; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4812; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4813; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4814; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4815; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4816; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4817; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4818; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4819; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4820; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4821; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4822; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4823; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4824; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 4825; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4826; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4827; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4828; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4829; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 4830; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4831; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4832; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4833; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4834; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4835; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4836; X86-NEXT: popl %esi # encoding: [0x5e] 4837; X86-NEXT: .cfi_def_cfa_offset 12 4838; X86-NEXT: popl %edi # encoding: [0x5f] 4839; X86-NEXT: .cfi_def_cfa_offset 8 4840; X86-NEXT: popl %ebx # encoding: [0x5b] 4841; X86-NEXT: .cfi_def_cfa_offset 4 4842; X86-NEXT: retl # encoding: [0xc3] 4843; 4844; X64-LABEL: test_cmp_b_256: 4845; X64: # %bb.0: 4846; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 4847; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4848; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] 4849; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4850; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 4851; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4852; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 4853; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4854; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 4855; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4856; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] 4857; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4858; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4859; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 4860; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] 4861; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 4862; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 4863; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 4864; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4865; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4866; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4867; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4868; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 4869; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4870; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4871; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4872; X64-NEXT: retq # encoding: [0xc3] 4873 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 4874 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4875 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 4876 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4877 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 4878 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4879 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 4880 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4881 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 4882 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4883 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 4884 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4885 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 4886 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4887 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 4888 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4889 ret <8 x i32> %vec7 4890} 4891 4892define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 4893; X86-LABEL: test_mask_cmp_b_256: 4894; X86: # %bb.0: 4895; X86-NEXT: pushl %ebp # encoding: [0x55] 4896; X86-NEXT: .cfi_def_cfa_offset 8 4897; X86-NEXT: pushl %ebx # encoding: [0x53] 4898; X86-NEXT: .cfi_def_cfa_offset 12 4899; X86-NEXT: pushl %edi # encoding: [0x57] 4900; X86-NEXT: .cfi_def_cfa_offset 16 4901; X86-NEXT: pushl %esi # encoding: [0x56] 4902; X86-NEXT: .cfi_def_cfa_offset 20 4903; X86-NEXT: .cfi_offset %esi, -20 4904; X86-NEXT: .cfi_offset %edi, -16 4905; X86-NEXT: .cfi_offset %ebx, -12 4906; X86-NEXT: .cfi_offset %ebp, -8 4907; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 4908; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 4909; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4910; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4911; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4912; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4913; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4914; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4915; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4916; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 4917; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4918; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 4919; X86-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4920; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 4921; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4922; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 4923; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 4924; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 4925; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 4926; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 4927; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4928; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4929; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 4930; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4931; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 4932; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4933; X86-NEXT: popl %esi # encoding: [0x5e] 4934; X86-NEXT: .cfi_def_cfa_offset 16 4935; X86-NEXT: popl %edi # encoding: [0x5f] 4936; X86-NEXT: .cfi_def_cfa_offset 12 4937; X86-NEXT: popl %ebx # encoding: [0x5b] 4938; X86-NEXT: .cfi_def_cfa_offset 8 4939; X86-NEXT: popl %ebp # encoding: [0x5d] 4940; X86-NEXT: .cfi_def_cfa_offset 4 4941; X86-NEXT: retl # encoding: [0xc3] 4942; 4943; X64-LABEL: test_mask_cmp_b_256: 4944; X64: # %bb.0: 4945; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 4946; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 4947; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 4948; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] 4949; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 4950; X64-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 4951; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 4952; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 4953; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 4954; X64-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 4955; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 4956; X64-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] 4957; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 4958; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4959; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 4960; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4961; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 4962; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 4963; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 4964; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 4965; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4966; X64-NEXT: vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1] 4967; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 4968; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 4969; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 4970; X64-NEXT: retq # encoding: [0xc3] 4971 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 4972 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 4973 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 4974 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 4975 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 4976 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 4977 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 4978 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 4979 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 4980 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 4981 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 4982 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 4983 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 4984 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 4985 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 4986 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 4987 ret <8 x i32> %vec7 4988} 4989 4990declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 4991 4992define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 4993; X86-LABEL: test_ucmp_b_256: 4994; X86: # %bb.0: 4995; X86-NEXT: pushl %ebx # encoding: [0x53] 4996; X86-NEXT: .cfi_def_cfa_offset 8 4997; X86-NEXT: pushl %edi # encoding: [0x57] 4998; X86-NEXT: .cfi_def_cfa_offset 12 4999; X86-NEXT: pushl %esi # encoding: [0x56] 5000; X86-NEXT: .cfi_def_cfa_offset 16 5001; X86-NEXT: .cfi_offset %esi, -16 5002; X86-NEXT: .cfi_offset %edi, -12 5003; X86-NEXT: .cfi_offset %ebx, -8 5004; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 5005; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5006; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 5007; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5008; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 5009; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5010; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 5011; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5012; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 5013; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5014; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 5015; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 5016; X86-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5017; X86-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 5018; X86-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02] 5019; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5020; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 5021; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 5022; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 5023; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] 5024; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5025; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5026; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 5027; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5028; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 5029; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5030; X86-NEXT: popl %esi # encoding: [0x5e] 5031; X86-NEXT: .cfi_def_cfa_offset 12 5032; X86-NEXT: popl %edi # encoding: [0x5f] 5033; X86-NEXT: .cfi_def_cfa_offset 8 5034; X86-NEXT: popl %ebx # encoding: [0x5b] 5035; X86-NEXT: .cfi_def_cfa_offset 4 5036; X86-NEXT: retl # encoding: [0xc3] 5037; 5038; X64-LABEL: test_ucmp_b_256: 5039; X64: # %bb.0: 5040; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] 5041; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 5042; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 5043; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5044; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 5045; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5046; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 5047; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5048; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 5049; X64-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5050; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 5051; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5052; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5053; X64-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] 5054; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] 5055; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5056; X64-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08] 5057; X64-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3] 5058; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 5059; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 5060; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5061; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5062; X64-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] 5063; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5064; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 5065; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5066; X64-NEXT: retq # encoding: [0xc3] 5067 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 5068 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 5069 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 5070 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 5071 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 5072 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 5073 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 5074 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 5075 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 5076 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 5077 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 5078 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 5079 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 5080 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 5081 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 5082 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 5083 ret <8 x i32> %vec7 5084} 5085 5086define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 5087; X86-LABEL: test_mask_ucmp_b_256: 5088; X86: # %bb.0: 5089; X86-NEXT: pushl %ebp # encoding: [0x55] 5090; X86-NEXT: .cfi_def_cfa_offset 8 5091; X86-NEXT: pushl %ebx # encoding: [0x53] 5092; X86-NEXT: .cfi_def_cfa_offset 12 5093; X86-NEXT: pushl %edi # encoding: [0x57] 5094; X86-NEXT: .cfi_def_cfa_offset 16 5095; X86-NEXT: pushl %esi # encoding: [0x56] 5096; X86-NEXT: .cfi_def_cfa_offset 20 5097; X86-NEXT: .cfi_offset %esi, -20 5098; X86-NEXT: .cfi_offset %edi, -16 5099; X86-NEXT: .cfi_offset %ebx, -12 5100; X86-NEXT: .cfi_offset %ebp, -8 5101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 5102; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5103; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 5104; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5105; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 5106; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5107; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 5108; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5109; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 5110; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 5111; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 5112; X86-NEXT: kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8] 5113; X86-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 5114; X86-NEXT: kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8] 5115; X86-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 5116; X86-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01] 5117; X86-NEXT: vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02] 5118; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 5119; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] 5120; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1] 5121; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5122; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5123; X86-NEXT: vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6] 5124; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5125; X86-NEXT: # xmm1 = xmm1[0],xmm2[0] 5126; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5127; X86-NEXT: popl %esi # encoding: [0x5e] 5128; X86-NEXT: .cfi_def_cfa_offset 16 5129; X86-NEXT: popl %edi # encoding: [0x5f] 5130; X86-NEXT: .cfi_def_cfa_offset 12 5131; X86-NEXT: popl %ebx # encoding: [0x5b] 5132; X86-NEXT: .cfi_def_cfa_offset 8 5133; X86-NEXT: popl %ebp # encoding: [0x5d] 5134; X86-NEXT: .cfi_def_cfa_offset 4 5135; X86-NEXT: retl # encoding: [0xc3] 5136; 5137; X64-LABEL: test_mask_ucmp_b_256: 5138; X64: # %bb.0: 5139; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5140; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] 5141; X64-NEXT: kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0] 5142; X64-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 5143; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5144; X64-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 5145; X64-NEXT: kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8] 5146; X64-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 5147; X64-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 5148; X64-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 5149; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5150; X64-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 5151; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5152; X64-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 5153; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 5154; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 5155; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 5156; X64-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] 5157; X64-NEXT: vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] 5158; X64-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] 5159; X64-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 5160; X64-NEXT: vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1] 5161; X64-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] 5162; X64-NEXT: # xmm1 = xmm1[0],xmm2[0] 5163; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] 5164; X64-NEXT: retq # encoding: [0xc3] 5165 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 5166 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 5167 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 5168 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 5169 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 5170 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 5171 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 5172 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 5173 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 5174 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 5175 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 5176 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 5177 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 5178 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 5179 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 5180 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 5181 ret <8 x i32> %vec7 5182} 5183 5184declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 5185 5186define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 5187; CHECK-LABEL: test_cmp_w_256: 5188; CHECK: # %bb.0: 5189; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 5190; CHECK-NEXT: vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8] 5191; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02] 5192; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 5193; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05] 5194; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9] 5195; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5196; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5197; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5198; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5199; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5200; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5201; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5202; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5203; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5204; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5205; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5206; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5207; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5208; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5209; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5210; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5211; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5212 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 5213 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5214 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 5215 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5216 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 5217 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5218 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 5219 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5220 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 5221 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5222 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 5223 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5224 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 5225 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5226 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 5227 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5228 ret <8 x i16> %vec7 5229} 5230 5231define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 5232; X86-LABEL: test_mask_cmp_w_256: 5233; X86: # %bb.0: 5234; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5235; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5236; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5237; X86-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 5238; X86-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 5239; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5240; X86-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 5241; X86-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 5242; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5243; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5244; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5245; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5246; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5247; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5248; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5249; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5250; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5251; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5252; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5253; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5254; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5255; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5256; X86-NEXT: retl # encoding: [0xc3] 5257; 5258; X64-LABEL: test_mask_cmp_w_256: 5259; X64: # %bb.0: 5260; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5261; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5262; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0] 5263; X64-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 5264; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5265; X64-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05] 5266; X64-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9] 5267; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5268; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5269; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5270; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5271; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5272; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5273; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5274; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5275; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5276; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5277; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5278; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5279; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5280; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5281; X64-NEXT: retq # encoding: [0xc3] 5282 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 5283 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5284 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 5285 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5286 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 5287 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5288 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 5289 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5290 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 5291 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5292 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 5293 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5294 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 5295 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5296 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 5297 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5298 ret <8 x i16> %vec7 5299} 5300 5301declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 5302 5303define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 5304; CHECK-LABEL: test_ucmp_w_256: 5305; CHECK: # %bb.0: 5306; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] 5307; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01] 5308; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02] 5309; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04] 5310; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05] 5311; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06] 5312; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5313; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5314; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5315; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5316; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5317; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5318; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5319; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5320; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5321; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5322; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5323; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5324; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5325; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5326; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5327; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5328; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5329 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 5330 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5331 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 5332 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5333 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 5334 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5335 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 5336 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5337 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 5338 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5339 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 5340 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5341 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 5342 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5343 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 5344 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5345 ret <8 x i16> %vec7 5346} 5347 5348define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 5349; X86-LABEL: test_mask_ucmp_w_256: 5350; X86: # %bb.0: 5351; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5352; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5353; X86-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5354; X86-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 5355; X86-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 5356; X86-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5357; X86-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 5358; X86-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 5359; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5360; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5361; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5362; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5363; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5364; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5365; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5366; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5367; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5368; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5369; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5370; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5371; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5372; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5373; X86-NEXT: retl # encoding: [0xc3] 5374; 5375; X64-LABEL: test_mask_ucmp_w_256: 5376; X64: # %bb.0: 5377; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5378; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] 5379; X64-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 5380; X64-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 5381; X64-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04] 5382; X64-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05] 5383; X64-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06] 5384; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5385; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5386; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5387; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5388; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5389; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5390; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5391; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5392; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5393; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5394; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5395; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5396; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5397; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 5398; X64-NEXT: retq # encoding: [0xc3] 5399 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 5400 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5401 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 5402 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5403 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 5404 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5405 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 5406 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5407 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 5408 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5409 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 5410 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5411 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 5412 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5413 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 5414 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5415 ret <8 x i16> %vec7 5416} 5417 5418declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 5419 5420define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 5421; CHECK-LABEL: test_cmp_b_128: 5422; CHECK: # %bb.0: 5423; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 5424; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8] 5425; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02] 5426; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 5427; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05] 5428; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9] 5429; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5430; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5431; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5432; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5433; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5434; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5435; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5436; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5437; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5438; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5439; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5440; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5441; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5442; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5443; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5444; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5445 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 5446 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5447 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 5448 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5449 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 5450 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5451 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 5452 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5453 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 5454 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5455 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 5456 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5457 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 5458 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5459 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 5460 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5461 ret <8 x i16> %vec7 5462} 5463 5464define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 5465; X86-LABEL: test_mask_cmp_b_128: 5466; X86: # %bb.0: 5467; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5468; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5469; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5470; X86-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 5471; X86-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 5472; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5473; X86-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 5474; X86-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 5475; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5476; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5477; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5478; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5479; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5480; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5481; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5482; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5483; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5484; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5485; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5486; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5487; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5488; X86-NEXT: retl # encoding: [0xc3] 5489; 5490; X64-LABEL: test_mask_cmp_b_128: 5491; X64: # %bb.0: 5492; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5493; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5494; X64-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0] 5495; X64-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 5496; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5497; X64-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05] 5498; X64-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9] 5499; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5500; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5501; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5502; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5503; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5504; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5505; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5506; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5507; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5508; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5509; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5510; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5511; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5512; X64-NEXT: retq # encoding: [0xc3] 5513 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 5514 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5515 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 5516 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5517 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 5518 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5519 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 5520 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5521 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 5522 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5523 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 5524 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5525 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5526 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5527 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5528 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5529 ret <8 x i16> %vec7 5530} 5531 5532declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5533 5534define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 5535; CHECK-LABEL: test_ucmp_b_128: 5536; CHECK: # %bb.0: 5537; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 5538; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01] 5539; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02] 5540; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04] 5541; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05] 5542; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06] 5543; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5544; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5545; CHECK-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5546; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5547; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5548; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5549; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5550; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5551; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5552; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5553; CHECK-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5554; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5555; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 5556; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80] 5557; CHECK-NEXT: # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 5558; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5559 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 5560 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5561 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 5562 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5563 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 5564 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5565 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 5566 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5567 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 5568 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5569 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 5570 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5571 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 5572 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5573 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 5574 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5575 ret <8 x i16> %vec7 5576} 5577 5578define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 5579; X86-LABEL: test_mask_ucmp_b_128: 5580; X86: # %bb.0: 5581; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5582; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5583; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5584; X86-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5585; X86-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5586; X86-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5587; X86-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5588; X86-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5589; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5590; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] 5591; X86-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] 5592; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 5593; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5594; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 5595; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5596; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 5597; X86-NEXT: kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd] 5598; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 5599; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5600; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 5601; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 5602; X86-NEXT: retl # encoding: [0xc3] 5603; 5604; X64-LABEL: test_mask_ucmp_b_128: 5605; X64: # %bb.0: 5606; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5607; X64-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 5608; X64-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 5609; X64-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 5610; X64-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04] 5611; X64-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05] 5612; X64-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06] 5613; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5614; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 5615; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5616; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 5617; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5618; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 5619; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5620; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 5621; X64-NEXT: kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5] 5622; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 5623; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5624; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 5625; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 5626; X64-NEXT: retq # encoding: [0xc3] 5627 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 5628 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 5629 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 5630 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 5631 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 5632 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 5633 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 5634 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 5635 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 5636 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 5637 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 5638 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 5639 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 5640 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 5641 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 5642 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 5643 ret <8 x i16> %vec7 5644} 5645 5646declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 5647 5648define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5649; CHECK-LABEL: test_cmp_w_128: 5650; CHECK: # %bb.0: 5651; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5652; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5653; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] 5654; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02] 5655; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04] 5656; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05] 5657; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1] 5658; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5659; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5660; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5661; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5662; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5663; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5664; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5665; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5666; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5667; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5668; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5669; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5670; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5671; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5672; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5673 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5674 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5675 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5676 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5677 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5678 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5679 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5680 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5681 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5682 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5683 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5684 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5685 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5686 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5687 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5688 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5689 ret <8 x i8> %vec7 5690} 5691 5692define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5693; X86-LABEL: test_mask_cmp_w_128: 5694; X86: # %bb.0: 5695; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5696; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5697; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5698; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5699; X86-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0] 5700; X86-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02] 5701; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5702; X86-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05] 5703; X86-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5704; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5705; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 5706; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5707; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 5708; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5709; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 5710; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5711; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 5712; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5713; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 5714; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5715; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 5716; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5717; X86-NEXT: retl # encoding: [0xc3] 5718; 5719; X64-LABEL: test_mask_cmp_w_128: 5720; X64: # %bb.0: 5721; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5722; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5723; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5724; X64-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0] 5725; X64-NEXT: vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02] 5726; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5727; X64-NEXT: vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05] 5728; X64-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9] 5729; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5730; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5731; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5732; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5733; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5734; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5735; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5736; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5737; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5738; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5739; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5740; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5741; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 5742; X64-NEXT: retq # encoding: [0xc3] 5743 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5744 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5745 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5746 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5747 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5748 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5749 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5750 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5751 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5752 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5753 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5754 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5755 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5756 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5757 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5758 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5759 ret <8 x i8> %vec7 5760} 5761 5762declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5763 5764define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 5765; CHECK-LABEL: test_ucmp_w_128: 5766; CHECK: # %bb.0: 5767; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 5768; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5769; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01] 5770; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02] 5771; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04] 5772; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05] 5773; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06] 5774; CHECK-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5775; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5776; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5777; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5778; CHECK-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5779; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5780; CHECK-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5781; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5782; CHECK-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5783; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5784; CHECK-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5785; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5786; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 5787; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5788; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5789 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 5790 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5791 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 5792 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5793 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 5794 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5795 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 5796 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5797 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 5798 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5799 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 5800 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5801 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 5802 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5803 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 5804 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5805 ret <8 x i8> %vec7 5806} 5807 5808define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 5809; X86-LABEL: test_mask_ucmp_w_128: 5810; X86: # %bb.0: 5811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5812; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 5813; X86-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5814; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5815; X86-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01] 5816; X86-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02] 5817; X86-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5818; X86-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05] 5819; X86-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5820; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 5821; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] 5822; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 5823; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] 5824; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 5825; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] 5826; X86-NEXT: kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb] 5827; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] 5828; X86-NEXT: kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc] 5829; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] 5830; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 5831; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] 5832; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 5833; X86-NEXT: retl # encoding: [0xc3] 5834; 5835; X64-LABEL: test_mask_ucmp_w_128: 5836; X64: # %bb.0: 5837; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5838; X64-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 5839; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 5840; X64-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01] 5841; X64-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02] 5842; X64-NEXT: vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04] 5843; X64-NEXT: vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05] 5844; X64-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06] 5845; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 5846; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] 5847; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 5848; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] 5849; X64-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] 5850; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 5851; X64-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 5852; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 5853; X64-NEXT: kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4] 5854; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 5855; X64-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 5856; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 5857; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] 5858; X64-NEXT: retq # encoding: [0xc3] 5859 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 5860 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 5861 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 5862 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 5863 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 5864 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 5865 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 5866 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 5867 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 5868 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 5869 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 5870 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 5871 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 5872 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 5873 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 5874 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 5875 ret <8 x i8> %vec7 5876} 5877 5878declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 5879 5880define <16 x i8>@mm_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 5881; CHECK-LABEL: mm_avg_epu8: 5882; CHECK: # %bb.0: 5883; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] 5884; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5885 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 5886 ret <16 x i8> %res 5887} 5888 5889define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 5890; X86-LABEL: mm_mask_avg_epu8: 5891; X86: # %bb.0: 5892; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5893; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5894; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5895; X86-NEXT: retl # encoding: [0xc3] 5896; 5897; X64-LABEL: mm_mask_avg_epu8: 5898; X64: # %bb.0: 5899; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5900; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 5901; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 5902; X64-NEXT: retq # encoding: [0xc3] 5903 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 5904 ret <16 x i8> %res 5905} 5906 5907declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) 5908 5909define <16 x i8>@test_int_x86_avx512_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1) { 5910; CHECK-LABEL: test_int_x86_avx512_pabs_b_128: 5911; CHECK: # %bb.0: 5912; CHECK-NEXT: vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 5913; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5914 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 5915 ret <16 x i8> %res 5916} 5917 5918define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5919; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5920; X86: # %bb.0: 5921; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 5922; X86-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5923; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 5924; X86-NEXT: retl # encoding: [0xc3] 5925; 5926; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128: 5927; X64: # %bb.0: 5928; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5929; X64-NEXT: vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 5930; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 5931; X64-NEXT: retq # encoding: [0xc3] 5932 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5933 ret <16 x i8> %res 5934} 5935 5936declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 5937 5938define <32 x i8>@mm256_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 5939; CHECK-LABEL: mm256_avg_epu8: 5940; CHECK: # %bb.0: 5941; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] 5942; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5943 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 5944 ret <32 x i8> %res 5945} 5946 5947define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 5948; X86-LABEL: mm256_mask_avg_epu8: 5949; X86: # %bb.0: 5950; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5951; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5952; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5953; X86-NEXT: retl # encoding: [0xc3] 5954; 5955; X64-LABEL: mm256_mask_avg_epu8: 5956; X64: # %bb.0: 5957; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5958; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 5959; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 5960; X64-NEXT: retq # encoding: [0xc3] 5961 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 5962 ret <32 x i8> %res 5963} 5964 5965declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) 5966 5967define <32 x i8>@test_int_x86_avx512_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1) { 5968; CHECK-LABEL: test_int_x86_avx512_pabs_b_256: 5969; CHECK: # %bb.0: 5970; CHECK-NEXT: vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 5971; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5972 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) 5973 ret <32 x i8> %res 5974} 5975 5976define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5977; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5978; X86: # %bb.0: 5979; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 5980; X86-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5981; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5982; X86-NEXT: retl # encoding: [0xc3] 5983; 5984; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256: 5985; X64: # %bb.0: 5986; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 5987; X64-NEXT: vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 5988; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 5989; X64-NEXT: retq # encoding: [0xc3] 5990 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5991 ret <32 x i8> %res 5992} 5993 5994declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 5995 5996define <8 x i16>@mm_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 5997; CHECK-LABEL: mm_avg_epu16: 5998; CHECK: # %bb.0: 5999; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] 6000; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6001 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6002 ret <8 x i16> %res 6003} 6004 6005define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6006; X86-LABEL: mm_mask_avg_epu16: 6007; X86: # %bb.0: 6008; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6009; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6010; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 6011; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6012; X86-NEXT: retl # encoding: [0xc3] 6013; 6014; X64-LABEL: mm_mask_avg_epu16: 6015; X64: # %bb.0: 6016; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6017; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 6018; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6019; X64-NEXT: retq # encoding: [0xc3] 6020 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6021 ret <8 x i16> %res 6022} 6023 6024declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) 6025 6026define <8 x i16>@test_int_x86_avx512_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1) { 6027; CHECK-LABEL: test_int_x86_avx512_pabs_w_128: 6028; CHECK: # %bb.0: 6029; CHECK-NEXT: vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 6030; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6031 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 6032 ret <8 x i16> %res 6033} 6034 6035define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6036; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128: 6037; X86: # %bb.0: 6038; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6039; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6040; X86-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 6041; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6042; X86-NEXT: retl # encoding: [0xc3] 6043; 6044; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128: 6045; X64: # %bb.0: 6046; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6047; X64-NEXT: vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 6048; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6049; X64-NEXT: retq # encoding: [0xc3] 6050 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6051 ret <8 x i16> %res 6052} 6053 6054declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6055 6056define <16 x i16>@mm256_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6057; CHECK-LABEL: mm256_avg_epu16: 6058; CHECK: # %bb.0: 6059; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] 6060; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6061 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6062 ret <16 x i16> %res 6063} 6064 6065define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6066; X86-LABEL: mm256_mask_avg_epu16: 6067; X86: # %bb.0: 6068; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6069; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 6070; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6071; X86-NEXT: retl # encoding: [0xc3] 6072; 6073; X64-LABEL: mm256_mask_avg_epu16: 6074; X64: # %bb.0: 6075; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6076; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 6077; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6078; X64-NEXT: retq # encoding: [0xc3] 6079 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6080 ret <16 x i16> %res 6081} 6082 6083declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) 6084 6085define <16 x i16>@test_int_x86_avx512_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1) { 6086; CHECK-LABEL: test_int_x86_avx512_pabs_w_256: 6087; CHECK: # %bb.0: 6088; CHECK-NEXT: vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 6089; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6090 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) 6091 ret <16 x i16> %res 6092} 6093 6094define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6095; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256: 6096; X86: # %bb.0: 6097; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6098; X86-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 6099; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6100; X86-NEXT: retl # encoding: [0xc3] 6101; 6102; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256: 6103; X64: # %bb.0: 6104; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6105; X64-NEXT: vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 6106; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6107; X64-NEXT: retq # encoding: [0xc3] 6108 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6109 ret <16 x i16> %res 6110} 6111 6112declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6113 6114declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) 6115 6116define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 6117; X86-LABEL: test_int_x86_avx512_ptestm_b_128: 6118; X86: # %bb.0: 6119; X86-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 6120; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6121; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6122; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6123; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6124; X86-NEXT: # kill: def $ax killed $ax killed $eax 6125; X86-NEXT: retl # encoding: [0xc3] 6126; 6127; X64-LABEL: test_int_x86_avx512_ptestm_b_128: 6128; X64: # %bb.0: 6129; X64-NEXT: vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 6130; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6131; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6132; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6133; X64-NEXT: # kill: def $ax killed $ax killed $eax 6134; X64-NEXT: retq # encoding: [0xc3] 6135 %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 6136 %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 6137 %res2 = add i16 %res, %res1 6138 ret i16 %res2 6139} 6140 6141declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) 6142 6143define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 6144; X86-LABEL: test_int_x86_avx512_ptestm_b_256: 6145; X86: # %bb.0: 6146; X86-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 6147; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6148; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 6149; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 6150; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6151; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6152; X86-NEXT: retl # encoding: [0xc3] 6153; 6154; X64-LABEL: test_int_x86_avx512_ptestm_b_256: 6155; X64: # %bb.0: 6156; X64-NEXT: vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 6157; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6158; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6159; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6160; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6161; X64-NEXT: retq # encoding: [0xc3] 6162 %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 6163 %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 6164 %res2 = add i32 %res, %res1 6165 ret i32 %res2 6166} 6167 6168declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) 6169 6170define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6171; X86-LABEL: test_int_x86_avx512_ptestm_w_128: 6172; X86: # %bb.0: 6173; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 6174; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6175; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 6176; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 6177; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 6178; X86-NEXT: retl # encoding: [0xc3] 6179; 6180; X64-LABEL: test_int_x86_avx512_ptestm_w_128: 6181; X64: # %bb.0: 6182; X64-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 6183; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6184; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 6185; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 6186; X64-NEXT: # kill: def $al killed $al killed $eax 6187; X64-NEXT: retq # encoding: [0xc3] 6188 %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6189 %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 6190 %res2 = add i8 %res, %res1 6191 ret i8 %res2 6192} 6193 6194declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) 6195 6196define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6197; X86-LABEL: test_int_x86_avx512_ptestm_w_256: 6198; X86: # %bb.0: 6199; X86-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 6200; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6201; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6202; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6203; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6204; X86-NEXT: # kill: def $ax killed $ax killed $eax 6205; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6206; X86-NEXT: retl # encoding: [0xc3] 6207; 6208; X64-LABEL: test_int_x86_avx512_ptestm_w_256: 6209; X64: # %bb.0: 6210; X64-NEXT: vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 6211; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6212; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6213; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6214; X64-NEXT: # kill: def $ax killed $ax killed $eax 6215; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6216; X64-NEXT: retq # encoding: [0xc3] 6217 %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6218 %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 6219 %res2 = add i16 %res, %res1 6220 ret i16 %res2 6221} 6222 6223declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) 6224 6225define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 6226; X86-LABEL: test_int_x86_avx512_ptestnm_b_128: 6227; X86: # %bb.0: 6228; X86-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 6229; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6230; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6231; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6232; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6233; X86-NEXT: # kill: def $ax killed $ax killed $eax 6234; X86-NEXT: retl # encoding: [0xc3] 6235; 6236; X64-LABEL: test_int_x86_avx512_ptestnm_b_128: 6237; X64: # %bb.0: 6238; X64-NEXT: vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 6239; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6240; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6241; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6242; X64-NEXT: # kill: def $ax killed $ax killed $eax 6243; X64-NEXT: retq # encoding: [0xc3] 6244 %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 6245 %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 6246 %res2 = add i16 %res, %res1 6247 ret i16 %res2 6248} 6249 6250declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) 6251 6252define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 6253; X86-LABEL: test_int_x86_avx512_ptestnm_b_256: 6254; X86: # %bb.0: 6255; X86-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 6256; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6257; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 6258; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 6259; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6260; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6261; X86-NEXT: retl # encoding: [0xc3] 6262; 6263; X64-LABEL: test_int_x86_avx512_ptestnm_b_256: 6264; X64: # %bb.0: 6265; X64-NEXT: vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 6266; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6267; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6268; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6269; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6270; X64-NEXT: retq # encoding: [0xc3] 6271 %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 6272 %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 6273 %res2 = add i32 %res, %res1 6274 ret i32 %res2 6275} 6276 6277declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) 6278 6279define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 6280; X86-LABEL: test_int_x86_avx512_ptestnm_w_128: 6281; X86: # %bb.0: 6282; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 6283; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6284; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 6285; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 6286; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 6287; X86-NEXT: retl # encoding: [0xc3] 6288; 6289; X64-LABEL: test_int_x86_avx512_ptestnm_w_128: 6290; X64: # %bb.0: 6291; X64-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 6292; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6293; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 6294; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 6295; X64-NEXT: # kill: def $al killed $al killed $eax 6296; X64-NEXT: retq # encoding: [0xc3] 6297 %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 6298 %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 6299 %res2 = add i8 %res, %res1 6300 ret i8 %res2 6301} 6302 6303declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) 6304 6305define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 6306; X86-LABEL: test_int_x86_avx512_ptestnm_w_256: 6307; X86: # %bb.0: 6308; X86-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 6309; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 6310; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 6311; X86-NEXT: andw %cx, %ax # encoding: [0x66,0x21,0xc8] 6312; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 6313; X86-NEXT: # kill: def $ax killed $ax killed $eax 6314; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6315; X86-NEXT: retl # encoding: [0xc3] 6316; 6317; X64-LABEL: test_int_x86_avx512_ptestnm_w_256: 6318; X64: # %bb.0: 6319; X64-NEXT: vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 6320; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6321; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 6322; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 6323; X64-NEXT: # kill: def $ax killed $ax killed $eax 6324; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6325; X64-NEXT: retq # encoding: [0xc3] 6326 %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 6327 %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 6328 %res2 = add i16 %res, %res1 6329 ret i16 %res2 6330} 6331 6332declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) 6333 6334define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { 6335; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: 6336; CHECK: # %bb.0: 6337; CHECK-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] 6338; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 6339; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6340 %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) 6341 ret i16 %res 6342} 6343 6344declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) 6345 6346define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { 6347; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: 6348; CHECK: # %bb.0: 6349; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0] 6350; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6351; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6352 %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) 6353 ret i32 %res 6354} 6355 6356declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) 6357 6358define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { 6359; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: 6360; CHECK: # %bb.0: 6361; CHECK-NEXT: vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] 6362; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6363; CHECK-NEXT: # kill: def $al killed $al killed $eax 6364; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6365 %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) 6366 ret i8 %res 6367} 6368 6369declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) 6370 6371define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { 6372; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: 6373; CHECK: # %bb.0: 6374; CHECK-NEXT: vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] 6375; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 6376; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 6377; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 6378; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6379 %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) 6380 ret i16 %res 6381} 6382 6383declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6384 6385define <8 x i16>@test_int_x86_avx512_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6386; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_128: 6387; CHECK: # %bb.0: 6388; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] 6389; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6390 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6391 ret <8 x i16> %res 6392} 6393 6394define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6395; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 6396; X86: # %bb.0: 6397; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6398; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6399; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 6400; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6401; X86-NEXT: retl # encoding: [0xc3] 6402; 6403; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 6404; X64: # %bb.0: 6405; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6406; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 6407; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6408; X64-NEXT: retq # encoding: [0xc3] 6409 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6410 ret <8 x i16> %res 6411} 6412 6413declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6414 6415define <16 x i16>@test_int_x86_avx512_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6416; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_256: 6417; CHECK: # %bb.0: 6418; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] 6419; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6420 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6421 ret <16 x i16> %res 6422} 6423 6424define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6425; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 6426; X86: # %bb.0: 6427; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6428; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 6429; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6430; X86-NEXT: retl # encoding: [0xc3] 6431; 6432; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 6433; X64: # %bb.0: 6434; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6435; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 6436; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6437; X64-NEXT: retq # encoding: [0xc3] 6438 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6439 ret <16 x i16> %res 6440} 6441 6442declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6443 6444define <8 x i16>@test_int_x86_avx512_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6445; CHECK-LABEL: test_int_x86_avx512_pmulh_w_128: 6446; CHECK: # %bb.0: 6447; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] 6448; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6449 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6450 ret <8 x i16> %res 6451} 6452 6453define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6454; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 6455; X86: # %bb.0: 6456; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6457; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6458; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 6459; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6460; X86-NEXT: retl # encoding: [0xc3] 6461; 6462; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 6463; X64: # %bb.0: 6464; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6465; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 6466; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6467; X64-NEXT: retq # encoding: [0xc3] 6468 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6469 ret <8 x i16> %res 6470} 6471 6472declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6473 6474define <16 x i16>@test_int_x86_avx512_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6475; CHECK-LABEL: test_int_x86_avx512_pmulh_w_256: 6476; CHECK: # %bb.0: 6477; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] 6478; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6479 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6480 ret <16 x i16> %res 6481} 6482 6483define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6484; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 6485; X86: # %bb.0: 6486; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6487; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 6488; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6489; X86-NEXT: retl # encoding: [0xc3] 6490; 6491; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 6492; X64: # %bb.0: 6493; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6494; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 6495; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6496; X64-NEXT: retq # encoding: [0xc3] 6497 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6498 ret <16 x i16> %res 6499} 6500 6501declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6502 6503define <8 x i16>@test_int_x86_avx512_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6504; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_128: 6505; CHECK: # %bb.0: 6506; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] 6507; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6508 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6509 ret <8 x i16> %res 6510} 6511 6512define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6513; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 6514; X86: # %bb.0: 6515; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6516; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6517; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 6518; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6519; X86-NEXT: retl # encoding: [0xc3] 6520; 6521; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 6522; X64: # %bb.0: 6523; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6524; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 6525; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6526; X64-NEXT: retq # encoding: [0xc3] 6527 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6528 ret <8 x i16> %res 6529} 6530 6531declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6532 6533define <16 x i16>@test_int_x86_avx512_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6534; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_256: 6535; CHECK: # %bb.0: 6536; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 6537; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6538 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6539 ret <16 x i16> %res 6540} 6541 6542define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6543; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 6544; X86: # %bb.0: 6545; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6546; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 6547; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6548; X86-NEXT: retl # encoding: [0xc3] 6549; 6550; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 6551; X64: # %bb.0: 6552; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6553; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 6554; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6555; X64-NEXT: retq # encoding: [0xc3] 6556 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6557 ret <16 x i16> %res 6558} 6559 6560declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) 6561 6562define <8 x i16>@test_int_x86_avx512_ask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2) { 6563; CHECK-LABEL: test_int_x86_avx512_ask_pmaddubs_w_128: 6564; CHECK: # %bb.0: 6565; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] 6566; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6567 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) 6568 ret <8 x i16> %res 6569} 6570 6571define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 6572; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 6573; X86: # %bb.0: 6574; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6575; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6576; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 6577; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6578; X86-NEXT: retl # encoding: [0xc3] 6579; 6580; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 6581; X64: # %bb.0: 6582; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6583; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 6584; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6585; X64-NEXT: retq # encoding: [0xc3] 6586 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) 6587 ret <8 x i16> %res 6588} 6589 6590declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) 6591 6592define <16 x i16>@test_int_x86_avx512_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2) { 6593; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_256: 6594; CHECK: # %bb.0: 6595; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 6596; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6597 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) 6598 ret <16 x i16> %res 6599} 6600 6601define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 6602; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 6603; X86: # %bb.0: 6604; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6605; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6606; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6607; X86-NEXT: retl # encoding: [0xc3] 6608; 6609; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 6610; X64: # %bb.0: 6611; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6612; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 6613; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6614; X64-NEXT: retq # encoding: [0xc3] 6615 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) 6616 ret <16 x i16> %res 6617} 6618 6619declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) 6620 6621define <4 x i32>@test_int_x86_avx512_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2) { 6622; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_128: 6623; CHECK: # %bb.0: 6624; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] 6625; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6626 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) 6627 ret <4 x i32> %res 6628} 6629 6630define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 6631; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6632; X86: # %bb.0: 6633; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6634; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6635; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6636; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6637; X86-NEXT: retl # encoding: [0xc3] 6638; 6639; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 6640; X64: # %bb.0: 6641; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6642; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 6643; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6644; X64-NEXT: retq # encoding: [0xc3] 6645 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) 6646 ret <4 x i32> %res 6647} 6648 6649declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) 6650 6651define <8 x i32>@test_int_x86_avx512_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2) { 6652; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_256: 6653; CHECK: # %bb.0: 6654; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] 6655; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6656 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) 6657 ret <8 x i32> %res 6658} 6659 6660define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 6661; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6662; X86: # %bb.0: 6663; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6664; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6665; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6666; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6667; X86-NEXT: retl # encoding: [0xc3] 6668; 6669; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 6670; X64: # %bb.0: 6671; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6672; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 6673; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6674; X64-NEXT: retq # encoding: [0xc3] 6675 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) 6676 ret <8 x i32> %res 6677} 6678 6679declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6680 6681define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6682; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128: 6683; CHECK: # %bb.0: 6684; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] 6685; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6686 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6687 ret <8 x i16> %res 6688} 6689 6690define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6691; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6692; X86: # %bb.0: 6693; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6694; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6695; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6696; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6697; X86-NEXT: retl # encoding: [0xc3] 6698; 6699; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 6700; X64: # %bb.0: 6701; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6702; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 6703; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 6704; X64-NEXT: retq # encoding: [0xc3] 6705 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6706 ret <8 x i16> %res 6707} 6708 6709define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 6710; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 6711; X86: # %bb.0: 6712; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6713; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6714; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6715; X86-NEXT: retl # encoding: [0xc3] 6716; 6717; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 6718; X64: # %bb.0: 6719; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6720; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 6721; X64-NEXT: retq # encoding: [0xc3] 6722 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 6723 ret <8 x i16> %res 6724} 6725 6726declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6727 6728define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6729; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256: 6730; CHECK: # %bb.0: 6731; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] 6732; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6733 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6734 ret <16 x i16> %res 6735} 6736 6737define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6738; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6739; X86: # %bb.0: 6740; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6741; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6742; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6743; X86-NEXT: retl # encoding: [0xc3] 6744; 6745; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 6746; X64: # %bb.0: 6747; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6748; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 6749; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 6750; X64-NEXT: retq # encoding: [0xc3] 6751 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6752 ret <16 x i16> %res 6753} 6754 6755define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 6756; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 6757; X86: # %bb.0: 6758; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6759; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6760; X86-NEXT: retl # encoding: [0xc3] 6761; 6762; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 6763; X64: # %bb.0: 6764; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6765; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 6766; X64-NEXT: retq # encoding: [0xc3] 6767 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 6768 ret <16 x i16> %res 6769} 6770 6771declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6772 6773define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6774; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128: 6775; CHECK: # %bb.0: 6776; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2] 6777; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6778 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6779 ret <8 x i16> %res 6780} 6781 6782define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6783; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6784; X86: # %bb.0: 6785; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6786; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6787; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6788; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6789; X86-NEXT: retl # encoding: [0xc3] 6790; 6791; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 6792; X64: # %bb.0: 6793; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6794; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 6795; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6796; X64-NEXT: retq # encoding: [0xc3] 6797 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6798 ret <8 x i16> %res 6799} 6800 6801declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6802 6803define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6804; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6805; X86: # %bb.0: 6806; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6807; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6808; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 6809; X86-NEXT: retl # encoding: [0xc3] 6810; 6811; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 6812; X64: # %bb.0: 6813; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6814; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 6815; X64-NEXT: retq # encoding: [0xc3] 6816 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6817 ret <8 x i16> %res 6818} 6819 6820declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6821 6822define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6823; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256: 6824; CHECK: # %bb.0: 6825; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2] 6826; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6827 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6828 ret <16 x i16> %res 6829} 6830 6831define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6832; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6833; X86: # %bb.0: 6834; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6835; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6836; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6837; X86-NEXT: retl # encoding: [0xc3] 6838; 6839; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 6840; X64: # %bb.0: 6841; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6842; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 6843; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6844; X64-NEXT: retq # encoding: [0xc3] 6845 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6846 ret <16 x i16> %res 6847} 6848 6849declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6850 6851define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6852; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6853; X86: # %bb.0: 6854; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6855; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 6856; X86-NEXT: retl # encoding: [0xc3] 6857; 6858; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 6859; X64: # %bb.0: 6860; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6861; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 6862; X64-NEXT: retq # encoding: [0xc3] 6863 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6864 ret <16 x i16> %res 6865} 6866 6867declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 6868 6869define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 6870; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128: 6871; CHECK: # %bb.0: 6872; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2] 6873; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6874 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 6875 ret <8 x i16> %res 6876} 6877 6878define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 6879; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6880; X86: # %bb.0: 6881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6882; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6883; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6884; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6885; X86-NEXT: retl # encoding: [0xc3] 6886; 6887; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 6888; X64: # %bb.0: 6889; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6890; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 6891; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 6892; X64-NEXT: retq # encoding: [0xc3] 6893 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 6894 ret <8 x i16> %res 6895} 6896 6897declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 6898 6899define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 6900; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256: 6901; CHECK: # %bb.0: 6902; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2] 6903; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6904 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 6905 ret <16 x i16> %res 6906} 6907 6908define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 6909; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6910; X86: # %bb.0: 6911; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6912; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6913; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6914; X86-NEXT: retl # encoding: [0xc3] 6915; 6916; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 6917; X64: # %bb.0: 6918; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6919; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 6920; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 6921; X64-NEXT: retq # encoding: [0xc3] 6922 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 6923 ret <16 x i16> %res 6924} 6925 6926declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) 6927 6928define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 6929; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6930; X86: # %bb.0: 6931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6932; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 6933; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 6934; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 6935; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] 6936; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 6937; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6938; X86-NEXT: retl # encoding: [0xc3] 6939; 6940; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 6941; X64: # %bb.0: 6942; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6943; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 6944; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 6945; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] 6946; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 6947; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 6948; X64-NEXT: retq # encoding: [0xc3] 6949 %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) 6950 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4) 6951 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1) 6952 %res3 = add <8 x i16> %res, %res1 6953 %res4 = add <8 x i16> %res2, %res3 6954 ret <8 x i16> %res4 6955} 6956 6957declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) 6958 6959define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 6960; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6961; X86: # %bb.0: 6962; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 6963; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 6964; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 6965; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] 6966; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 6967; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6968; X86-NEXT: retl # encoding: [0xc3] 6969; 6970; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 6971; X64: # %bb.0: 6972; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 6973; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 6974; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 6975; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] 6976; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 6977; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 6978; X64-NEXT: retq # encoding: [0xc3] 6979 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) 6980 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4) 6981 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1) 6982 %res3 = add <16 x i16> %res, %res1 6983 %res4 = add <16 x i16> %res3, %res2 6984 ret <16 x i16> %res4 6985} 6986 6987define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 6988; CHECK-LABEL: test_mask_adds_epu16_rr_128: 6989; CHECK: # %bb.0: 6990; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 6991; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6992 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 6993 ret <8 x i16> %res 6994} 6995 6996define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 6997; X86-LABEL: test_mask_adds_epu16_rrk_128: 6998; X86: # %bb.0: 6999; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7000; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7001; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 7002; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7003; X86-NEXT: retl # encoding: [0xc3] 7004; 7005; X64-LABEL: test_mask_adds_epu16_rrk_128: 7006; X64: # %bb.0: 7007; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7008; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 7009; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7010; X64-NEXT: retq # encoding: [0xc3] 7011 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7012 ret <8 x i16> %res 7013} 7014 7015define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7016; X86-LABEL: test_mask_adds_epu16_rrkz_128: 7017; X86: # %bb.0: 7018; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7019; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7020; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 7021; X86-NEXT: retl # encoding: [0xc3] 7022; 7023; X64-LABEL: test_mask_adds_epu16_rrkz_128: 7024; X64: # %bb.0: 7025; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7026; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 7027; X64-NEXT: retq # encoding: [0xc3] 7028 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7029 ret <8 x i16> %res 7030} 7031 7032define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 7033; X86-LABEL: test_mask_adds_epu16_rm_128: 7034; X86: # %bb.0: 7035; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7036; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00] 7037; X86-NEXT: retl # encoding: [0xc3] 7038; 7039; X64-LABEL: test_mask_adds_epu16_rm_128: 7040; X64: # %bb.0: 7041; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07] 7042; X64-NEXT: retq # encoding: [0xc3] 7043 %b = load <8 x i16>, <8 x i16>* %ptr_b 7044 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7045 ret <8 x i16> %res 7046} 7047 7048define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 7049; X86-LABEL: test_mask_adds_epu16_rmk_128: 7050; X86: # %bb.0: 7051; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7052; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7053; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7054; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08] 7055; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7056; X86-NEXT: retl # encoding: [0xc3] 7057; 7058; X64-LABEL: test_mask_adds_epu16_rmk_128: 7059; X64: # %bb.0: 7060; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7061; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] 7062; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7063; X64-NEXT: retq # encoding: [0xc3] 7064 %b = load <8 x i16>, <8 x i16>* %ptr_b 7065 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7066 ret <8 x i16> %res 7067} 7068 7069define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 7070; X86-LABEL: test_mask_adds_epu16_rmkz_128: 7071; X86: # %bb.0: 7072; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7073; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7074; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7075; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00] 7076; X86-NEXT: retl # encoding: [0xc3] 7077; 7078; X64-LABEL: test_mask_adds_epu16_rmkz_128: 7079; X64: # %bb.0: 7080; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7081; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] 7082; X64-NEXT: retq # encoding: [0xc3] 7083 %b = load <8 x i16>, <8 x i16>* %ptr_b 7084 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7085 ret <8 x i16> %res 7086} 7087 7088declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7089 7090define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7091; CHECK-LABEL: test_mask_adds_epu16_rr_256: 7092; CHECK: # %bb.0: 7093; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] 7094; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7095 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7096 ret <16 x i16> %res 7097} 7098 7099define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7100; X86-LABEL: test_mask_adds_epu16_rrk_256: 7101; X86: # %bb.0: 7102; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7103; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 7104; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7105; X86-NEXT: retl # encoding: [0xc3] 7106; 7107; X64-LABEL: test_mask_adds_epu16_rrk_256: 7108; X64: # %bb.0: 7109; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7110; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 7111; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7112; X64-NEXT: retq # encoding: [0xc3] 7113 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7114 ret <16 x i16> %res 7115} 7116 7117define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7118; X86-LABEL: test_mask_adds_epu16_rrkz_256: 7119; X86: # %bb.0: 7120; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7121; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 7122; X86-NEXT: retl # encoding: [0xc3] 7123; 7124; X64-LABEL: test_mask_adds_epu16_rrkz_256: 7125; X64: # %bb.0: 7126; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7127; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 7128; X64-NEXT: retq # encoding: [0xc3] 7129 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7130 ret <16 x i16> %res 7131} 7132 7133define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 7134; X86-LABEL: test_mask_adds_epu16_rm_256: 7135; X86: # %bb.0: 7136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7137; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00] 7138; X86-NEXT: retl # encoding: [0xc3] 7139; 7140; X64-LABEL: test_mask_adds_epu16_rm_256: 7141; X64: # %bb.0: 7142; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07] 7143; X64-NEXT: retq # encoding: [0xc3] 7144 %b = load <16 x i16>, <16 x i16>* %ptr_b 7145 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7146 ret <16 x i16> %res 7147} 7148 7149define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 7150; X86-LABEL: test_mask_adds_epu16_rmk_256: 7151; X86: # %bb.0: 7152; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7153; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7154; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08] 7155; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7156; X86-NEXT: retl # encoding: [0xc3] 7157; 7158; X64-LABEL: test_mask_adds_epu16_rmk_256: 7159; X64: # %bb.0: 7160; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7161; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] 7162; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7163; X64-NEXT: retq # encoding: [0xc3] 7164 %b = load <16 x i16>, <16 x i16>* %ptr_b 7165 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7166 ret <16 x i16> %res 7167} 7168 7169define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 7170; X86-LABEL: test_mask_adds_epu16_rmkz_256: 7171; X86: # %bb.0: 7172; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7174; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00] 7175; X86-NEXT: retl # encoding: [0xc3] 7176; 7177; X64-LABEL: test_mask_adds_epu16_rmkz_256: 7178; X64: # %bb.0: 7179; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7180; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] 7181; X64-NEXT: retq # encoding: [0xc3] 7182 %b = load <16 x i16>, <16 x i16>* %ptr_b 7183 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7184 ret <16 x i16> %res 7185} 7186 7187declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7188 7189define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 7190; CHECK-LABEL: test_mask_subs_epu16_rr_128: 7191; CHECK: # %bb.0: 7192; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 7193; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7194 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7195 ret <8 x i16> %res 7196} 7197 7198define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7199; X86-LABEL: test_mask_subs_epu16_rrk_128: 7200; X86: # %bb.0: 7201; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7202; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7203; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 7204; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7205; X86-NEXT: retl # encoding: [0xc3] 7206; 7207; X64-LABEL: test_mask_subs_epu16_rrk_128: 7208; X64: # %bb.0: 7209; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7210; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 7211; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7212; X64-NEXT: retq # encoding: [0xc3] 7213 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7214 ret <8 x i16> %res 7215} 7216 7217define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7218; X86-LABEL: test_mask_subs_epu16_rrkz_128: 7219; X86: # %bb.0: 7220; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7221; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7222; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 7223; X86-NEXT: retl # encoding: [0xc3] 7224; 7225; X64-LABEL: test_mask_subs_epu16_rrkz_128: 7226; X64: # %bb.0: 7227; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7228; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 7229; X64-NEXT: retq # encoding: [0xc3] 7230 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7231 ret <8 x i16> %res 7232} 7233 7234define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 7235; X86-LABEL: test_mask_subs_epu16_rm_128: 7236; X86: # %bb.0: 7237; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7238; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00] 7239; X86-NEXT: retl # encoding: [0xc3] 7240; 7241; X64-LABEL: test_mask_subs_epu16_rm_128: 7242; X64: # %bb.0: 7243; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07] 7244; X64-NEXT: retq # encoding: [0xc3] 7245 %b = load <8 x i16>, <8 x i16>* %ptr_b 7246 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7247 ret <8 x i16> %res 7248} 7249 7250define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 7251; X86-LABEL: test_mask_subs_epu16_rmk_128: 7252; X86: # %bb.0: 7253; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7254; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7255; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7256; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08] 7257; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7258; X86-NEXT: retl # encoding: [0xc3] 7259; 7260; X64-LABEL: test_mask_subs_epu16_rmk_128: 7261; X64: # %bb.0: 7262; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7263; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] 7264; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7265; X64-NEXT: retq # encoding: [0xc3] 7266 %b = load <8 x i16>, <8 x i16>* %ptr_b 7267 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7268 ret <8 x i16> %res 7269} 7270 7271define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 7272; X86-LABEL: test_mask_subs_epu16_rmkz_128: 7273; X86: # %bb.0: 7274; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7275; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7276; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7277; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00] 7278; X86-NEXT: retl # encoding: [0xc3] 7279; 7280; X64-LABEL: test_mask_subs_epu16_rmkz_128: 7281; X64: # %bb.0: 7282; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7283; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] 7284; X64-NEXT: retq # encoding: [0xc3] 7285 %b = load <8 x i16>, <8 x i16>* %ptr_b 7286 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7287 ret <8 x i16> %res 7288} 7289 7290declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7291 7292define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7293; CHECK-LABEL: test_mask_subs_epu16_rr_256: 7294; CHECK: # %bb.0: 7295; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] 7296; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7297 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7298 ret <16 x i16> %res 7299} 7300 7301define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7302; X86-LABEL: test_mask_subs_epu16_rrk_256: 7303; X86: # %bb.0: 7304; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7305; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 7306; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7307; X86-NEXT: retl # encoding: [0xc3] 7308; 7309; X64-LABEL: test_mask_subs_epu16_rrk_256: 7310; X64: # %bb.0: 7311; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7312; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 7313; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7314; X64-NEXT: retq # encoding: [0xc3] 7315 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7316 ret <16 x i16> %res 7317} 7318 7319define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7320; X86-LABEL: test_mask_subs_epu16_rrkz_256: 7321; X86: # %bb.0: 7322; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7323; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 7324; X86-NEXT: retl # encoding: [0xc3] 7325; 7326; X64-LABEL: test_mask_subs_epu16_rrkz_256: 7327; X64: # %bb.0: 7328; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7329; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 7330; X64-NEXT: retq # encoding: [0xc3] 7331 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7332 ret <16 x i16> %res 7333} 7334 7335define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 7336; X86-LABEL: test_mask_subs_epu16_rm_256: 7337; X86: # %bb.0: 7338; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7339; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00] 7340; X86-NEXT: retl # encoding: [0xc3] 7341; 7342; X64-LABEL: test_mask_subs_epu16_rm_256: 7343; X64: # %bb.0: 7344; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07] 7345; X64-NEXT: retq # encoding: [0xc3] 7346 %b = load <16 x i16>, <16 x i16>* %ptr_b 7347 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7348 ret <16 x i16> %res 7349} 7350 7351define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 7352; X86-LABEL: test_mask_subs_epu16_rmk_256: 7353; X86: # %bb.0: 7354; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7355; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7356; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08] 7357; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7358; X86-NEXT: retl # encoding: [0xc3] 7359; 7360; X64-LABEL: test_mask_subs_epu16_rmk_256: 7361; X64: # %bb.0: 7362; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7363; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] 7364; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7365; X64-NEXT: retq # encoding: [0xc3] 7366 %b = load <16 x i16>, <16 x i16>* %ptr_b 7367 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7368 ret <16 x i16> %res 7369} 7370 7371define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 7372; X86-LABEL: test_mask_subs_epu16_rmkz_256: 7373; X86: # %bb.0: 7374; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7375; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7376; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00] 7377; X86-NEXT: retl # encoding: [0xc3] 7378; 7379; X64-LABEL: test_mask_subs_epu16_rmkz_256: 7380; X64: # %bb.0: 7381; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7382; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] 7383; X64-NEXT: retq # encoding: [0xc3] 7384 %b = load <16 x i16>, <16 x i16>* %ptr_b 7385 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7386 ret <16 x i16> %res 7387} 7388 7389declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7390 7391define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 7392; CHECK-LABEL: test_mask_adds_epu8_rr_128: 7393; CHECK: # %bb.0: 7394; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 7395; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7396 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7397 ret <16 x i8> %res 7398} 7399 7400define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 7401; X86-LABEL: test_mask_adds_epu8_rrk_128: 7402; X86: # %bb.0: 7403; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7404; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 7405; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7406; X86-NEXT: retl # encoding: [0xc3] 7407; 7408; X64-LABEL: test_mask_adds_epu8_rrk_128: 7409; X64: # %bb.0: 7410; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7411; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 7412; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7413; X64-NEXT: retq # encoding: [0xc3] 7414 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7415 ret <16 x i8> %res 7416} 7417 7418define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 7419; X86-LABEL: test_mask_adds_epu8_rrkz_128: 7420; X86: # %bb.0: 7421; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7422; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 7423; X86-NEXT: retl # encoding: [0xc3] 7424; 7425; X64-LABEL: test_mask_adds_epu8_rrkz_128: 7426; X64: # %bb.0: 7427; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7428; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 7429; X64-NEXT: retq # encoding: [0xc3] 7430 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7431 ret <16 x i8> %res 7432} 7433 7434define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 7435; X86-LABEL: test_mask_adds_epu8_rm_128: 7436; X86: # %bb.0: 7437; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7438; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00] 7439; X86-NEXT: retl # encoding: [0xc3] 7440; 7441; X64-LABEL: test_mask_adds_epu8_rm_128: 7442; X64: # %bb.0: 7443; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07] 7444; X64-NEXT: retq # encoding: [0xc3] 7445 %b = load <16 x i8>, <16 x i8>* %ptr_b 7446 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7447 ret <16 x i8> %res 7448} 7449 7450define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 7451; X86-LABEL: test_mask_adds_epu8_rmk_128: 7452; X86: # %bb.0: 7453; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7454; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7455; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08] 7456; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7457; X86-NEXT: retl # encoding: [0xc3] 7458; 7459; X64-LABEL: test_mask_adds_epu8_rmk_128: 7460; X64: # %bb.0: 7461; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7462; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] 7463; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7464; X64-NEXT: retq # encoding: [0xc3] 7465 %b = load <16 x i8>, <16 x i8>* %ptr_b 7466 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7467 ret <16 x i8> %res 7468} 7469 7470define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 7471; X86-LABEL: test_mask_adds_epu8_rmkz_128: 7472; X86: # %bb.0: 7473; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7474; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7475; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00] 7476; X86-NEXT: retl # encoding: [0xc3] 7477; 7478; X64-LABEL: test_mask_adds_epu8_rmkz_128: 7479; X64: # %bb.0: 7480; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7481; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] 7482; X64-NEXT: retq # encoding: [0xc3] 7483 %b = load <16 x i8>, <16 x i8>* %ptr_b 7484 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7485 ret <16 x i8> %res 7486} 7487 7488declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 7489 7490define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 7491; CHECK-LABEL: test_mask_adds_epu8_rr_256: 7492; CHECK: # %bb.0: 7493; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] 7494; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7495 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7496 ret <32 x i8> %res 7497} 7498 7499define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 7500; X86-LABEL: test_mask_adds_epu8_rrk_256: 7501; X86: # %bb.0: 7502; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7503; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 7504; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7505; X86-NEXT: retl # encoding: [0xc3] 7506; 7507; X64-LABEL: test_mask_adds_epu8_rrk_256: 7508; X64: # %bb.0: 7509; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7510; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 7511; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7512; X64-NEXT: retq # encoding: [0xc3] 7513 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7514 ret <32 x i8> %res 7515} 7516 7517define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 7518; X86-LABEL: test_mask_adds_epu8_rrkz_256: 7519; X86: # %bb.0: 7520; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7521; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 7522; X86-NEXT: retl # encoding: [0xc3] 7523; 7524; X64-LABEL: test_mask_adds_epu8_rrkz_256: 7525; X64: # %bb.0: 7526; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7527; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 7528; X64-NEXT: retq # encoding: [0xc3] 7529 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7530 ret <32 x i8> %res 7531} 7532 7533define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 7534; X86-LABEL: test_mask_adds_epu8_rm_256: 7535; X86: # %bb.0: 7536; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7537; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00] 7538; X86-NEXT: retl # encoding: [0xc3] 7539; 7540; X64-LABEL: test_mask_adds_epu8_rm_256: 7541; X64: # %bb.0: 7542; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07] 7543; X64-NEXT: retq # encoding: [0xc3] 7544 %b = load <32 x i8>, <32 x i8>* %ptr_b 7545 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7546 ret <32 x i8> %res 7547} 7548 7549define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 7550; X86-LABEL: test_mask_adds_epu8_rmk_256: 7551; X86: # %bb.0: 7552; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7553; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7554; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08] 7555; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7556; X86-NEXT: retl # encoding: [0xc3] 7557; 7558; X64-LABEL: test_mask_adds_epu8_rmk_256: 7559; X64: # %bb.0: 7560; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7561; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] 7562; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7563; X64-NEXT: retq # encoding: [0xc3] 7564 %b = load <32 x i8>, <32 x i8>* %ptr_b 7565 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7566 ret <32 x i8> %res 7567} 7568 7569define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 7570; X86-LABEL: test_mask_adds_epu8_rmkz_256: 7571; X86: # %bb.0: 7572; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7573; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7574; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00] 7575; X86-NEXT: retl # encoding: [0xc3] 7576; 7577; X64-LABEL: test_mask_adds_epu8_rmkz_256: 7578; X64: # %bb.0: 7579; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7580; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07] 7581; X64-NEXT: retq # encoding: [0xc3] 7582 %b = load <32 x i8>, <32 x i8>* %ptr_b 7583 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7584 ret <32 x i8> %res 7585} 7586 7587declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 7588 7589define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 7590; CHECK-LABEL: test_mask_subs_epu8_rr_128: 7591; CHECK: # %bb.0: 7592; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 7593; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7594 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7595 ret <16 x i8> %res 7596} 7597 7598define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 7599; X86-LABEL: test_mask_subs_epu8_rrk_128: 7600; X86: # %bb.0: 7601; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7602; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 7603; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7604; X86-NEXT: retl # encoding: [0xc3] 7605; 7606; X64-LABEL: test_mask_subs_epu8_rrk_128: 7607; X64: # %bb.0: 7608; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7609; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 7610; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7611; X64-NEXT: retq # encoding: [0xc3] 7612 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7613 ret <16 x i8> %res 7614} 7615 7616define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 7617; X86-LABEL: test_mask_subs_epu8_rrkz_128: 7618; X86: # %bb.0: 7619; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7620; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 7621; X86-NEXT: retl # encoding: [0xc3] 7622; 7623; X64-LABEL: test_mask_subs_epu8_rrkz_128: 7624; X64: # %bb.0: 7625; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7626; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 7627; X64-NEXT: retq # encoding: [0xc3] 7628 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7629 ret <16 x i8> %res 7630} 7631 7632define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 7633; X86-LABEL: test_mask_subs_epu8_rm_128: 7634; X86: # %bb.0: 7635; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7636; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00] 7637; X86-NEXT: retl # encoding: [0xc3] 7638; 7639; X64-LABEL: test_mask_subs_epu8_rm_128: 7640; X64: # %bb.0: 7641; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07] 7642; X64-NEXT: retq # encoding: [0xc3] 7643 %b = load <16 x i8>, <16 x i8>* %ptr_b 7644 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 7645 ret <16 x i8> %res 7646} 7647 7648define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 7649; X86-LABEL: test_mask_subs_epu8_rmk_128: 7650; X86: # %bb.0: 7651; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7652; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7653; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08] 7654; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7655; X86-NEXT: retl # encoding: [0xc3] 7656; 7657; X64-LABEL: test_mask_subs_epu8_rmk_128: 7658; X64: # %bb.0: 7659; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7660; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] 7661; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7662; X64-NEXT: retq # encoding: [0xc3] 7663 %b = load <16 x i8>, <16 x i8>* %ptr_b 7664 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 7665 ret <16 x i8> %res 7666} 7667 7668define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 7669; X86-LABEL: test_mask_subs_epu8_rmkz_128: 7670; X86: # %bb.0: 7671; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7672; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7673; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00] 7674; X86-NEXT: retl # encoding: [0xc3] 7675; 7676; X64-LABEL: test_mask_subs_epu8_rmkz_128: 7677; X64: # %bb.0: 7678; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7679; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] 7680; X64-NEXT: retq # encoding: [0xc3] 7681 %b = load <16 x i8>, <16 x i8>* %ptr_b 7682 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 7683 ret <16 x i8> %res 7684} 7685 7686declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 7687 7688define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 7689; CHECK-LABEL: test_mask_subs_epu8_rr_256: 7690; CHECK: # %bb.0: 7691; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] 7692; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7693 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7694 ret <32 x i8> %res 7695} 7696 7697define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 7698; X86-LABEL: test_mask_subs_epu8_rrk_256: 7699; X86: # %bb.0: 7700; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7701; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 7702; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7703; X86-NEXT: retl # encoding: [0xc3] 7704; 7705; X64-LABEL: test_mask_subs_epu8_rrk_256: 7706; X64: # %bb.0: 7707; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7708; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 7709; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7710; X64-NEXT: retq # encoding: [0xc3] 7711 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7712 ret <32 x i8> %res 7713} 7714 7715define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 7716; X86-LABEL: test_mask_subs_epu8_rrkz_256: 7717; X86: # %bb.0: 7718; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 7719; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 7720; X86-NEXT: retl # encoding: [0xc3] 7721; 7722; X64-LABEL: test_mask_subs_epu8_rrkz_256: 7723; X64: # %bb.0: 7724; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7725; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 7726; X64-NEXT: retq # encoding: [0xc3] 7727 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7728 ret <32 x i8> %res 7729} 7730 7731define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 7732; X86-LABEL: test_mask_subs_epu8_rm_256: 7733; X86: # %bb.0: 7734; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7735; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00] 7736; X86-NEXT: retl # encoding: [0xc3] 7737; 7738; X64-LABEL: test_mask_subs_epu8_rm_256: 7739; X64: # %bb.0: 7740; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07] 7741; X64-NEXT: retq # encoding: [0xc3] 7742 %b = load <32 x i8>, <32 x i8>* %ptr_b 7743 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 7744 ret <32 x i8> %res 7745} 7746 7747define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 7748; X86-LABEL: test_mask_subs_epu8_rmk_256: 7749; X86: # %bb.0: 7750; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7751; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7752; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08] 7753; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7754; X86-NEXT: retl # encoding: [0xc3] 7755; 7756; X64-LABEL: test_mask_subs_epu8_rmk_256: 7757; X64: # %bb.0: 7758; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7759; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] 7760; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7761; X64-NEXT: retq # encoding: [0xc3] 7762 %b = load <32 x i8>, <32 x i8>* %ptr_b 7763 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 7764 ret <32 x i8> %res 7765} 7766 7767define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 7768; X86-LABEL: test_mask_subs_epu8_rmkz_256: 7769; X86: # %bb.0: 7770; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7771; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 7772; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00] 7773; X86-NEXT: retl # encoding: [0xc3] 7774; 7775; X64-LABEL: test_mask_subs_epu8_rmkz_256: 7776; X64: # %bb.0: 7777; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7778; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07] 7779; X64-NEXT: retq # encoding: [0xc3] 7780 %b = load <32 x i8>, <32 x i8>* %ptr_b 7781 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 7782 ret <32 x i8> %res 7783} 7784 7785declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 7786 7787define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 7788; CHECK-LABEL: test_mask_adds_epi16_rr_128: 7789; CHECK: # %bb.0: 7790; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 7791; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7792 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7793 ret <8 x i16> %res 7794} 7795 7796define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7797; X86-LABEL: test_mask_adds_epi16_rrk_128: 7798; X86: # %bb.0: 7799; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7800; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7801; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 7802; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7803; X86-NEXT: retl # encoding: [0xc3] 7804; 7805; X64-LABEL: test_mask_adds_epi16_rrk_128: 7806; X64: # %bb.0: 7807; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7808; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 7809; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7810; X64-NEXT: retq # encoding: [0xc3] 7811 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7812 ret <8 x i16> %res 7813} 7814 7815define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 7816; X86-LABEL: test_mask_adds_epi16_rrkz_128: 7817; X86: # %bb.0: 7818; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7819; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7820; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 7821; X86-NEXT: retl # encoding: [0xc3] 7822; 7823; X64-LABEL: test_mask_adds_epi16_rrkz_128: 7824; X64: # %bb.0: 7825; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7826; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 7827; X64-NEXT: retq # encoding: [0xc3] 7828 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7829 ret <8 x i16> %res 7830} 7831 7832define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 7833; X86-LABEL: test_mask_adds_epi16_rm_128: 7834; X86: # %bb.0: 7835; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7836; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00] 7837; X86-NEXT: retl # encoding: [0xc3] 7838; 7839; X64-LABEL: test_mask_adds_epi16_rm_128: 7840; X64: # %bb.0: 7841; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07] 7842; X64-NEXT: retq # encoding: [0xc3] 7843 %b = load <8 x i16>, <8 x i16>* %ptr_b 7844 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 7845 ret <8 x i16> %res 7846} 7847 7848define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 7849; X86-LABEL: test_mask_adds_epi16_rmk_128: 7850; X86: # %bb.0: 7851; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7852; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7853; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7854; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] 7855; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7856; X86-NEXT: retl # encoding: [0xc3] 7857; 7858; X64-LABEL: test_mask_adds_epi16_rmk_128: 7859; X64: # %bb.0: 7860; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7861; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 7862; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 7863; X64-NEXT: retq # encoding: [0xc3] 7864 %b = load <8 x i16>, <8 x i16>* %ptr_b 7865 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 7866 ret <8 x i16> %res 7867} 7868 7869define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 7870; X86-LABEL: test_mask_adds_epi16_rmkz_128: 7871; X86: # %bb.0: 7872; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7873; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 7874; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 7875; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] 7876; X86-NEXT: retl # encoding: [0xc3] 7877; 7878; X64-LABEL: test_mask_adds_epi16_rmkz_128: 7879; X64: # %bb.0: 7880; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7881; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 7882; X64-NEXT: retq # encoding: [0xc3] 7883 %b = load <8 x i16>, <8 x i16>* %ptr_b 7884 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 7885 ret <8 x i16> %res 7886} 7887 7888declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 7889 7890define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 7891; CHECK-LABEL: test_mask_adds_epi16_rr_256: 7892; CHECK: # %bb.0: 7893; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] 7894; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7895 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7896 ret <16 x i16> %res 7897} 7898 7899define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 7900; X86-LABEL: test_mask_adds_epi16_rrk_256: 7901; X86: # %bb.0: 7902; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7903; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 7904; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7905; X86-NEXT: retl # encoding: [0xc3] 7906; 7907; X64-LABEL: test_mask_adds_epi16_rrk_256: 7908; X64: # %bb.0: 7909; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7910; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 7911; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 7912; X64-NEXT: retq # encoding: [0xc3] 7913 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7914 ret <16 x i16> %res 7915} 7916 7917define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 7918; X86-LABEL: test_mask_adds_epi16_rrkz_256: 7919; X86: # %bb.0: 7920; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 7921; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 7922; X86-NEXT: retl # encoding: [0xc3] 7923; 7924; X64-LABEL: test_mask_adds_epi16_rrkz_256: 7925; X64: # %bb.0: 7926; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 7927; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 7928; X64-NEXT: retq # encoding: [0xc3] 7929 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7930 ret <16 x i16> %res 7931} 7932 7933define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 7934; X86-LABEL: test_mask_adds_epi16_rm_256: 7935; X86: # %bb.0: 7936; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7937; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00] 7938; X86-NEXT: retl # encoding: [0xc3] 7939; 7940; X64-LABEL: test_mask_adds_epi16_rm_256: 7941; X64: # %bb.0: 7942; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07] 7943; X64-NEXT: retq # encoding: [0xc3] 7944 %b = load <16 x i16>, <16 x i16>* %ptr_b 7945 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 7946 ret <16 x i16> %res 7947} 7948 7949define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 7950; X86-LABEL: test_mask_adds_epi16_rmk_256: 7951; X86: # %bb.0: 7952; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7953; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7954; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] 7955; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7956; X86-NEXT: retl # encoding: [0xc3] 7957; 7958; X64-LABEL: test_mask_adds_epi16_rmk_256: 7959; X64: # %bb.0: 7960; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7961; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 7962; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 7963; X64-NEXT: retq # encoding: [0xc3] 7964 %b = load <16 x i16>, <16 x i16>* %ptr_b 7965 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 7966 ret <16 x i16> %res 7967} 7968 7969define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 7970; X86-LABEL: test_mask_adds_epi16_rmkz_256: 7971; X86: # %bb.0: 7972; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 7973; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 7974; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] 7975; X86-NEXT: retl # encoding: [0xc3] 7976; 7977; X64-LABEL: test_mask_adds_epi16_rmkz_256: 7978; X64: # %bb.0: 7979; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 7980; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 7981; X64-NEXT: retq # encoding: [0xc3] 7982 %b = load <16 x i16>, <16 x i16>* %ptr_b 7983 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 7984 ret <16 x i16> %res 7985} 7986 7987declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 7988 7989declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0 7990 7991define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 7992; X86-LABEL: test_test_subs_epi16_rrk_128: 7993; X86: # %bb.0: 7994; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7995; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 7996; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 7997; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 7998; X86-NEXT: retl # encoding: [0xc3] 7999; 8000; X64-LABEL: test_test_subs_epi16_rrk_128: 8001; X64: # %bb.0: 8002; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8003; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8004; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8005; X64-NEXT: retq # encoding: [0xc3] 8006 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8007 %2 = bitcast i8 %mask to <8 x i1> 8008 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8009 ret <8 x i16> %3 8010} 8011 8012define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8013; X86-LABEL: test_test_subs_epi16_rrkz_128: 8014; X86: # %bb.0: 8015; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8016; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8017; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8018; X86-NEXT: retl # encoding: [0xc3] 8019; 8020; X64-LABEL: test_test_subs_epi16_rrkz_128: 8021; X64: # %bb.0: 8022; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8023; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8024; X64-NEXT: retq # encoding: [0xc3] 8025 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8026 %2 = bitcast i8 %mask to <8 x i1> 8027 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8028 ret <8 x i16> %3 8029} 8030 8031define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 8032; X86-LABEL: test_test_subs_epi16_rmk_128: 8033; X86: # %bb.0: 8034; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8035; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8036; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8037; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] 8038; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8039; X86-NEXT: retl # encoding: [0xc3] 8040; 8041; X64-LABEL: test_test_subs_epi16_rmk_128: 8042; X64: # %bb.0: 8043; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8044; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 8045; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8046; X64-NEXT: retq # encoding: [0xc3] 8047 %b = load <8 x i16>, <8 x i16>* %ptr_b 8048 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8049 %2 = bitcast i8 %mask to <8 x i1> 8050 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8051 ret <8 x i16> %3 8052} 8053 8054define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 8055; X86-LABEL: test_test_subs_epi16_rmkz_128: 8056; X86: # %bb.0: 8057; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8058; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8059; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8060; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] 8061; X86-NEXT: retl # encoding: [0xc3] 8062; 8063; X64-LABEL: test_test_subs_epi16_rmkz_128: 8064; X64: # %bb.0: 8065; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8066; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 8067; X64-NEXT: retq # encoding: [0xc3] 8068 %b = load <8 x i16>, <8 x i16>* %ptr_b 8069 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) 8070 %2 = bitcast i8 %mask to <8 x i1> 8071 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8072 ret <8 x i16> %3 8073} 8074 8075declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0 8076 8077define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8078; X86-LABEL: test_test_subs_epi16_rrk_256: 8079; X86: # %bb.0: 8080; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8081; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8082; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8083; X86-NEXT: retl # encoding: [0xc3] 8084; 8085; X64-LABEL: test_test_subs_epi16_rrk_256: 8086; X64: # %bb.0: 8087; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8088; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8089; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8090; X64-NEXT: retq # encoding: [0xc3] 8091 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8092 %2 = bitcast i16 %mask to <16 x i1> 8093 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8094 ret <16 x i16> %3 8095} 8096 8097define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8098; X86-LABEL: test_test_subs_epi16_rrkz_256: 8099; X86: # %bb.0: 8100; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8101; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8102; X86-NEXT: retl # encoding: [0xc3] 8103; 8104; X64-LABEL: test_test_subs_epi16_rrkz_256: 8105; X64: # %bb.0: 8106; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8107; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8108; X64-NEXT: retq # encoding: [0xc3] 8109 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8110 %2 = bitcast i16 %mask to <16 x i1> 8111 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8112 ret <16 x i16> %3 8113} 8114 8115define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 8116; X86-LABEL: test_test_subs_epi16_rmk_256: 8117; X86: # %bb.0: 8118; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8119; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8120; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] 8121; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8122; X86-NEXT: retl # encoding: [0xc3] 8123; 8124; X64-LABEL: test_test_subs_epi16_rmk_256: 8125; X64: # %bb.0: 8126; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8127; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 8128; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8129; X64-NEXT: retq # encoding: [0xc3] 8130 %b = load <16 x i16>, <16 x i16>* %ptr_b 8131 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8132 %2 = bitcast i16 %mask to <16 x i1> 8133 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8134 ret <16 x i16> %3 8135} 8136 8137define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 8138; X86-LABEL: test_test_subs_epi16_rmkz_256: 8139; X86: # %bb.0: 8140; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8141; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8142; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] 8143; X86-NEXT: retl # encoding: [0xc3] 8144; 8145; X64-LABEL: test_test_subs_epi16_rmkz_256: 8146; X64: # %bb.0: 8147; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8148; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 8149; X64-NEXT: retq # encoding: [0xc3] 8150 %b = load <16 x i16>, <16 x i16>* %ptr_b 8151 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) 8152 %2 = bitcast i16 %mask to <16 x i1> 8153 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8154 ret <16 x i16> %3 8155} 8156 8157declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0 8158 8159define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8160; X86-LABEL: test_test_subs_epi8_rrk_128: 8161; X86: # %bb.0: 8162; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8163; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 8164; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8165; X86-NEXT: retl # encoding: [0xc3] 8166; 8167; X64-LABEL: test_test_subs_epi8_rrk_128: 8168; X64: # %bb.0: 8169; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8170; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 8171; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8172; X64-NEXT: retq # encoding: [0xc3] 8173 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8174 %2 = bitcast i16 %mask to <16 x i1> 8175 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8176 ret <16 x i8> %3 8177} 8178 8179define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8180; X86-LABEL: test_test_subs_epi8_rrkz_128: 8181; X86: # %bb.0: 8182; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8183; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 8184; X86-NEXT: retl # encoding: [0xc3] 8185; 8186; X64-LABEL: test_test_subs_epi8_rrkz_128: 8187; X64: # %bb.0: 8188; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8189; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 8190; X64-NEXT: retq # encoding: [0xc3] 8191 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8192 %2 = bitcast i16 %mask to <16 x i1> 8193 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8194 ret <16 x i8> %3 8195} 8196 8197define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 8198; X86-LABEL: test_test_subs_epi8_rmk_128: 8199; X86: # %bb.0: 8200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8201; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8202; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] 8203; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8204; X86-NEXT: retl # encoding: [0xc3] 8205; 8206; X64-LABEL: test_test_subs_epi8_rmk_128: 8207; X64: # %bb.0: 8208; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8209; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 8210; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8211; X64-NEXT: retq # encoding: [0xc3] 8212 %b = load <16 x i8>, <16 x i8>* %ptr_b 8213 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8214 %2 = bitcast i16 %mask to <16 x i1> 8215 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8216 ret <16 x i8> %3 8217} 8218 8219define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 8220; X86-LABEL: test_test_subs_epi8_rmkz_128: 8221; X86: # %bb.0: 8222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8223; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8224; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] 8225; X86-NEXT: retl # encoding: [0xc3] 8226; 8227; X64-LABEL: test_test_subs_epi8_rmkz_128: 8228; X64: # %bb.0: 8229; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8230; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 8231; X64-NEXT: retq # encoding: [0xc3] 8232 %b = load <16 x i8>, <16 x i8>* %ptr_b 8233 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) 8234 %2 = bitcast i16 %mask to <16 x i1> 8235 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8236 ret <16 x i8> %3 8237} 8238 8239declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0 8240 8241define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8242; X86-LABEL: test_test_subs_epi8_rrk_256: 8243; X86: # %bb.0: 8244; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8245; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 8246; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8247; X86-NEXT: retl # encoding: [0xc3] 8248; 8249; X64-LABEL: test_test_subs_epi8_rrk_256: 8250; X64: # %bb.0: 8251; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8252; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 8253; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8254; X64-NEXT: retq # encoding: [0xc3] 8255 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8256 %2 = bitcast i32 %mask to <32 x i1> 8257 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8258 ret <32 x i8> %3 8259} 8260 8261define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 8262; X86-LABEL: test_test_subs_epi8_rrkz_256: 8263; X86: # %bb.0: 8264; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8265; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 8266; X86-NEXT: retl # encoding: [0xc3] 8267; 8268; X64-LABEL: test_test_subs_epi8_rrkz_256: 8269; X64: # %bb.0: 8270; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8271; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 8272; X64-NEXT: retq # encoding: [0xc3] 8273 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8274 %2 = bitcast i32 %mask to <32 x i1> 8275 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8276 ret <32 x i8> %3 8277} 8278 8279define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 8280; X86-LABEL: test_test_subs_epi8_rmk_256: 8281; X86: # %bb.0: 8282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8283; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8284; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] 8285; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8286; X86-NEXT: retl # encoding: [0xc3] 8287; 8288; X64-LABEL: test_test_subs_epi8_rmk_256: 8289; X64: # %bb.0: 8290; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8291; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 8292; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8293; X64-NEXT: retq # encoding: [0xc3] 8294 %b = load <32 x i8>, <32 x i8>* %ptr_b 8295 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8296 %2 = bitcast i32 %mask to <32 x i1> 8297 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8298 ret <32 x i8> %3 8299} 8300 8301define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 8302; X86-LABEL: test_test_subs_epi8_rmkz_256: 8303; X86: # %bb.0: 8304; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8305; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8306; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] 8307; X86-NEXT: retl # encoding: [0xc3] 8308; 8309; X64-LABEL: test_test_subs_epi8_rmkz_256: 8310; X64: # %bb.0: 8311; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8312; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 8313; X64-NEXT: retq # encoding: [0xc3] 8314 %b = load <32 x i8>, <32 x i8>* %ptr_b 8315 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) 8316 %2 = bitcast i32 %mask to <32 x i1> 8317 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8318 ret <32 x i8> %3 8319} 8320 8321define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 8322; CHECK-LABEL: test_mask_subs_epi16_rr_128: 8323; CHECK: # %bb.0: 8324; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 8325; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8326 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 8327 ret <8 x i16> %res 8328} 8329 8330define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 8331; X86-LABEL: test_mask_subs_epi16_rrk_128: 8332; X86: # %bb.0: 8333; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8334; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8335; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8336; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8337; X86-NEXT: retl # encoding: [0xc3] 8338; 8339; X64-LABEL: test_mask_subs_epi16_rrk_128: 8340; X64: # %bb.0: 8341; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8342; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 8343; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8344; X64-NEXT: retq # encoding: [0xc3] 8345 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 8346 ret <8 x i16> %res 8347} 8348 8349define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8350; X86-LABEL: test_mask_subs_epi16_rrkz_128: 8351; X86: # %bb.0: 8352; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8353; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8354; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8355; X86-NEXT: retl # encoding: [0xc3] 8356; 8357; X64-LABEL: test_mask_subs_epi16_rrkz_128: 8358; X64: # %bb.0: 8359; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8360; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 8361; X64-NEXT: retq # encoding: [0xc3] 8362 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 8363 ret <8 x i16> %res 8364} 8365 8366define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 8367; X86-LABEL: test_mask_subs_epi16_rm_128: 8368; X86: # %bb.0: 8369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8370; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00] 8371; X86-NEXT: retl # encoding: [0xc3] 8372; 8373; X64-LABEL: test_mask_subs_epi16_rm_128: 8374; X64: # %bb.0: 8375; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07] 8376; X64-NEXT: retq # encoding: [0xc3] 8377 %b = load <8 x i16>, <8 x i16>* %ptr_b 8378 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 8379 ret <8 x i16> %res 8380} 8381 8382define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 8383; X86-LABEL: test_mask_subs_epi16_rmk_128: 8384; X86: # %bb.0: 8385; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8386; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8387; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8388; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] 8389; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8390; X86-NEXT: retl # encoding: [0xc3] 8391; 8392; X64-LABEL: test_mask_subs_epi16_rmk_128: 8393; X64: # %bb.0: 8394; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8395; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 8396; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8397; X64-NEXT: retq # encoding: [0xc3] 8398 %b = load <8 x i16>, <8 x i16>* %ptr_b 8399 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 8400 ret <8 x i16> %res 8401} 8402 8403define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 8404; X86-LABEL: test_mask_subs_epi16_rmkz_128: 8405; X86: # %bb.0: 8406; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8407; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8408; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8409; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] 8410; X86-NEXT: retl # encoding: [0xc3] 8411; 8412; X64-LABEL: test_mask_subs_epi16_rmkz_128: 8413; X64: # %bb.0: 8414; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8415; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 8416; X64-NEXT: retq # encoding: [0xc3] 8417 %b = load <8 x i16>, <8 x i16>* %ptr_b 8418 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 8419 ret <8 x i16> %res 8420} 8421 8422declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 8423 8424define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 8425; CHECK-LABEL: test_mask_subs_epi16_rr_256: 8426; CHECK: # %bb.0: 8427; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] 8428; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8429 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 8430 ret <16 x i16> %res 8431} 8432 8433define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8434; X86-LABEL: test_mask_subs_epi16_rrk_256: 8435; X86: # %bb.0: 8436; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8437; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8438; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8439; X86-NEXT: retl # encoding: [0xc3] 8440; 8441; X64-LABEL: test_mask_subs_epi16_rrk_256: 8442; X64: # %bb.0: 8443; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8444; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 8445; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8446; X64-NEXT: retq # encoding: [0xc3] 8447 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 8448 ret <16 x i16> %res 8449} 8450 8451define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8452; X86-LABEL: test_mask_subs_epi16_rrkz_256: 8453; X86: # %bb.0: 8454; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8455; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8456; X86-NEXT: retl # encoding: [0xc3] 8457; 8458; X64-LABEL: test_mask_subs_epi16_rrkz_256: 8459; X64: # %bb.0: 8460; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8461; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 8462; X64-NEXT: retq # encoding: [0xc3] 8463 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 8464 ret <16 x i16> %res 8465} 8466 8467define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 8468; X86-LABEL: test_mask_subs_epi16_rm_256: 8469; X86: # %bb.0: 8470; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8471; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00] 8472; X86-NEXT: retl # encoding: [0xc3] 8473; 8474; X64-LABEL: test_mask_subs_epi16_rm_256: 8475; X64: # %bb.0: 8476; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07] 8477; X64-NEXT: retq # encoding: [0xc3] 8478 %b = load <16 x i16>, <16 x i16>* %ptr_b 8479 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 8480 ret <16 x i16> %res 8481} 8482 8483define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 8484; X86-LABEL: test_mask_subs_epi16_rmk_256: 8485; X86: # %bb.0: 8486; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8487; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8488; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] 8489; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8490; X86-NEXT: retl # encoding: [0xc3] 8491; 8492; X64-LABEL: test_mask_subs_epi16_rmk_256: 8493; X64: # %bb.0: 8494; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8495; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 8496; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8497; X64-NEXT: retq # encoding: [0xc3] 8498 %b = load <16 x i16>, <16 x i16>* %ptr_b 8499 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 8500 ret <16 x i16> %res 8501} 8502 8503define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 8504; X86-LABEL: test_mask_subs_epi16_rmkz_256: 8505; X86: # %bb.0: 8506; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8507; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8508; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] 8509; X86-NEXT: retl # encoding: [0xc3] 8510; 8511; X64-LABEL: test_mask_subs_epi16_rmkz_256: 8512; X64: # %bb.0: 8513; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8514; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 8515; X64-NEXT: retq # encoding: [0xc3] 8516 %b = load <16 x i16>, <16 x i16>* %ptr_b 8517 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 8518 ret <16 x i16> %res 8519} 8520 8521declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 8522 8523declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0 8524 8525define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 8526; X86-LABEL: test_adds_epi16_rrk_128: 8527; X86: # %bb.0: 8528; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8529; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8530; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 8531; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8532; X86-NEXT: retl # encoding: [0xc3] 8533; 8534; X64-LABEL: test_adds_epi16_rrk_128: 8535; X64: # %bb.0: 8536; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8537; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 8538; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8539; X64-NEXT: retq # encoding: [0xc3] 8540 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8541 %2 = bitcast i8 %mask to <8 x i1> 8542 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8543 ret <8 x i16> %3 8544} 8545 8546define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 8547; X86-LABEL: test_adds_epi16_rrkz_128: 8548; X86: # %bb.0: 8549; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8550; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 8551; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 8552; X86-NEXT: retl # encoding: [0xc3] 8553; 8554; X64-LABEL: test_adds_epi16_rrkz_128: 8555; X64: # %bb.0: 8556; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8557; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 8558; X64-NEXT: retq # encoding: [0xc3] 8559 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8560 %2 = bitcast i8 %mask to <8 x i1> 8561 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8562 ret <8 x i16> %3 8563} 8564 8565define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 8566; X86-LABEL: test_adds_epi16_rmk_128: 8567; X86: # %bb.0: 8568; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8569; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8570; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8571; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] 8572; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8573; X86-NEXT: retl # encoding: [0xc3] 8574; 8575; X64-LABEL: test_adds_epi16_rmk_128: 8576; X64: # %bb.0: 8577; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8578; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 8579; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8580; X64-NEXT: retq # encoding: [0xc3] 8581 %b = load <8 x i16>, <8 x i16>* %ptr_b 8582 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8583 %2 = bitcast i8 %mask to <8 x i1> 8584 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 8585 ret <8 x i16> %3 8586} 8587 8588define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 8589; X86-LABEL: test_adds_epi16_rmkz_128: 8590; X86: # %bb.0: 8591; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8592; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 8593; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 8594; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] 8595; X86-NEXT: retl # encoding: [0xc3] 8596; 8597; X64-LABEL: test_adds_epi16_rmkz_128: 8598; X64: # %bb.0: 8599; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8600; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 8601; X64-NEXT: retq # encoding: [0xc3] 8602 %b = load <8 x i16>, <8 x i16>* %ptr_b 8603 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) 8604 %2 = bitcast i8 %mask to <8 x i1> 8605 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 8606 ret <8 x i16> %3 8607} 8608 8609declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0 8610 8611define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 8612; X86-LABEL: test_adds_epi16_rrk_256: 8613; X86: # %bb.0: 8614; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8615; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 8616; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8617; X86-NEXT: retl # encoding: [0xc3] 8618; 8619; X64-LABEL: test_adds_epi16_rrk_256: 8620; X64: # %bb.0: 8621; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8622; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 8623; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8624; X64-NEXT: retq # encoding: [0xc3] 8625 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8626 %2 = bitcast i16 %mask to <16 x i1> 8627 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8628 ret <16 x i16> %3 8629} 8630 8631define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 8632; X86-LABEL: test_adds_epi16_rrkz_256: 8633; X86: # %bb.0: 8634; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8635; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 8636; X86-NEXT: retl # encoding: [0xc3] 8637; 8638; X64-LABEL: test_adds_epi16_rrkz_256: 8639; X64: # %bb.0: 8640; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8641; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 8642; X64-NEXT: retq # encoding: [0xc3] 8643 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8644 %2 = bitcast i16 %mask to <16 x i1> 8645 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8646 ret <16 x i16> %3 8647} 8648 8649define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 8650; X86-LABEL: test_adds_epi16_rmk_256: 8651; X86: # %bb.0: 8652; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8653; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8654; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] 8655; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8656; X86-NEXT: retl # encoding: [0xc3] 8657; 8658; X64-LABEL: test_adds_epi16_rmk_256: 8659; X64: # %bb.0: 8660; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8661; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 8662; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8663; X64-NEXT: retq # encoding: [0xc3] 8664 %b = load <16 x i16>, <16 x i16>* %ptr_b 8665 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8666 %2 = bitcast i16 %mask to <16 x i1> 8667 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 8668 ret <16 x i16> %3 8669} 8670 8671define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 8672; X86-LABEL: test_adds_epi16_rmkz_256: 8673; X86: # %bb.0: 8674; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8675; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8676; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] 8677; X86-NEXT: retl # encoding: [0xc3] 8678; 8679; X64-LABEL: test_adds_epi16_rmkz_256: 8680; X64: # %bb.0: 8681; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8682; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 8683; X64-NEXT: retq # encoding: [0xc3] 8684 %b = load <16 x i16>, <16 x i16>* %ptr_b 8685 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) 8686 %2 = bitcast i16 %mask to <16 x i1> 8687 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 8688 ret <16 x i16> %3 8689} 8690 8691declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0 8692 8693define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8694; X86-LABEL: test_adds_epi8_rrk_128: 8695; X86: # %bb.0: 8696; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8697; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8698; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8699; X86-NEXT: retl # encoding: [0xc3] 8700; 8701; X64-LABEL: test_adds_epi8_rrk_128: 8702; X64: # %bb.0: 8703; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8704; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8705; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8706; X64-NEXT: retq # encoding: [0xc3] 8707 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8708 %2 = bitcast i16 %mask to <16 x i1> 8709 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8710 ret <16 x i8> %3 8711} 8712 8713define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8714; X86-LABEL: test_adds_epi8_rrkz_128: 8715; X86: # %bb.0: 8716; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8717; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8718; X86-NEXT: retl # encoding: [0xc3] 8719; 8720; X64-LABEL: test_adds_epi8_rrkz_128: 8721; X64: # %bb.0: 8722; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8723; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8724; X64-NEXT: retq # encoding: [0xc3] 8725 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8726 %2 = bitcast i16 %mask to <16 x i1> 8727 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8728 ret <16 x i8> %3 8729} 8730 8731define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 8732; X86-LABEL: test_adds_epi8_rm_128: 8733; X86: # %bb.0: 8734; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8735; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] 8736; X86-NEXT: retl # encoding: [0xc3] 8737; 8738; X64-LABEL: test_adds_epi8_rm_128: 8739; X64: # %bb.0: 8740; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 8741; X64-NEXT: retq # encoding: [0xc3] 8742 %b = load <16 x i8>, <16 x i8>* %ptr_b 8743 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8744 ret <16 x i8> %1 8745} 8746 8747define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 8748; X86-LABEL: test_adds_epi8_rmk_128: 8749; X86: # %bb.0: 8750; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8751; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8752; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] 8753; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8754; X86-NEXT: retl # encoding: [0xc3] 8755; 8756; X64-LABEL: test_adds_epi8_rmk_128: 8757; X64: # %bb.0: 8758; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8759; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 8760; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8761; X64-NEXT: retq # encoding: [0xc3] 8762 %b = load <16 x i8>, <16 x i8>* %ptr_b 8763 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8764 %2 = bitcast i16 %mask to <16 x i1> 8765 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 8766 ret <16 x i8> %3 8767} 8768 8769define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 8770; X86-LABEL: test_adds_epi8_rmkz_128: 8771; X86: # %bb.0: 8772; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8773; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8774; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] 8775; X86-NEXT: retl # encoding: [0xc3] 8776; 8777; X64-LABEL: test_adds_epi8_rmkz_128: 8778; X64: # %bb.0: 8779; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8780; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 8781; X64-NEXT: retq # encoding: [0xc3] 8782 %b = load <16 x i8>, <16 x i8>* %ptr_b 8783 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) 8784 %2 = bitcast i16 %mask to <16 x i1> 8785 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 8786 ret <16 x i8> %3 8787} 8788 8789declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0 8790 8791define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8792; X86-LABEL: test_adds_epi8_rrk_256: 8793; X86: # %bb.0: 8794; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8795; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8796; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8797; X86-NEXT: retl # encoding: [0xc3] 8798; 8799; X64-LABEL: test_adds_epi8_rrk_256: 8800; X64: # %bb.0: 8801; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8802; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8803; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8804; X64-NEXT: retq # encoding: [0xc3] 8805 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8806 %2 = bitcast i32 %mask to <32 x i1> 8807 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8808 ret <32 x i8> %3 8809} 8810 8811define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 8812; X86-LABEL: test_adds_epi8_rrkz_256: 8813; X86: # %bb.0: 8814; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8815; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 8816; X86-NEXT: retl # encoding: [0xc3] 8817; 8818; X64-LABEL: test_adds_epi8_rrkz_256: 8819; X64: # %bb.0: 8820; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8821; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 8822; X64-NEXT: retq # encoding: [0xc3] 8823 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8824 %2 = bitcast i32 %mask to <32 x i1> 8825 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8826 ret <32 x i8> %3 8827} 8828 8829define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 8830; X86-LABEL: test_adds_epi8_rmk_256: 8831; X86: # %bb.0: 8832; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8833; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8834; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] 8835; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8836; X86-NEXT: retl # encoding: [0xc3] 8837; 8838; X64-LABEL: test_adds_epi8_rmk_256: 8839; X64: # %bb.0: 8840; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8841; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 8842; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 8843; X64-NEXT: retq # encoding: [0xc3] 8844 %b = load <32 x i8>, <32 x i8>* %ptr_b 8845 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8846 %2 = bitcast i32 %mask to <32 x i1> 8847 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 8848 ret <32 x i8> %3 8849} 8850 8851define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 8852; X86-LABEL: test_adds_epi8_rmkz_256: 8853; X86: # %bb.0: 8854; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8855; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 8856; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] 8857; X86-NEXT: retl # encoding: [0xc3] 8858; 8859; X64-LABEL: test_adds_epi8_rmkz_256: 8860; X64: # %bb.0: 8861; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8862; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 8863; X64-NEXT: retq # encoding: [0xc3] 8864 %b = load <32 x i8>, <32 x i8>* %ptr_b 8865 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) 8866 %2 = bitcast i32 %mask to <32 x i1> 8867 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 8868 ret <32 x i8> %3 8869} 8870 8871define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 8872; CHECK-LABEL: test_mask_adds_epi8_rr_128: 8873; CHECK: # %bb.0: 8874; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 8875; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8876 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 8877 ret <16 x i8> %res 8878} 8879 8880define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 8881; X86-LABEL: test_mask_adds_epi8_rrk_128: 8882; X86: # %bb.0: 8883; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8884; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8885; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8886; X86-NEXT: retl # encoding: [0xc3] 8887; 8888; X64-LABEL: test_mask_adds_epi8_rrk_128: 8889; X64: # %bb.0: 8890; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8891; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 8892; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 8893; X64-NEXT: retq # encoding: [0xc3] 8894 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 8895 ret <16 x i8> %res 8896} 8897 8898define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 8899; X86-LABEL: test_mask_adds_epi8_rrkz_128: 8900; X86: # %bb.0: 8901; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 8902; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8903; X86-NEXT: retl # encoding: [0xc3] 8904; 8905; X64-LABEL: test_mask_adds_epi8_rrkz_128: 8906; X64: # %bb.0: 8907; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8908; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 8909; X64-NEXT: retq # encoding: [0xc3] 8910 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 8911 ret <16 x i8> %res 8912} 8913 8914define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 8915; X86-LABEL: test_mask_adds_epi8_rm_128: 8916; X86: # %bb.0: 8917; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8918; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] 8919; X86-NEXT: retl # encoding: [0xc3] 8920; 8921; X64-LABEL: test_mask_adds_epi8_rm_128: 8922; X64: # %bb.0: 8923; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 8924; X64-NEXT: retq # encoding: [0xc3] 8925 %b = load <16 x i8>, <16 x i8>* %ptr_b 8926 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 8927 ret <16 x i8> %res 8928} 8929 8930define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 8931; X86-LABEL: test_mask_adds_epi8_rmk_128: 8932; X86: # %bb.0: 8933; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8934; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8935; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] 8936; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8937; X86-NEXT: retl # encoding: [0xc3] 8938; 8939; X64-LABEL: test_mask_adds_epi8_rmk_128: 8940; X64: # %bb.0: 8941; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8942; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 8943; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 8944; X64-NEXT: retq # encoding: [0xc3] 8945 %b = load <16 x i8>, <16 x i8>* %ptr_b 8946 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 8947 ret <16 x i8> %res 8948} 8949 8950define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 8951; X86-LABEL: test_mask_adds_epi8_rmkz_128: 8952; X86: # %bb.0: 8953; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 8954; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 8955; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] 8956; X86-NEXT: retl # encoding: [0xc3] 8957; 8958; X64-LABEL: test_mask_adds_epi8_rmkz_128: 8959; X64: # %bb.0: 8960; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 8961; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 8962; X64-NEXT: retq # encoding: [0xc3] 8963 %b = load <16 x i8>, <16 x i8>* %ptr_b 8964 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 8965 ret <16 x i8> %res 8966} 8967 8968declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 8969 8970define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 8971; CHECK-LABEL: test_mask_adds_epi8_rr_256: 8972; CHECK: # %bb.0: 8973; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] 8974; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8975 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 8976 ret <32 x i8> %res 8977} 8978 8979define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 8980; X86-LABEL: test_mask_adds_epi8_rrk_256: 8981; X86: # %bb.0: 8982; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 8983; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8984; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8985; X86-NEXT: retl # encoding: [0xc3] 8986; 8987; X64-LABEL: test_mask_adds_epi8_rrk_256: 8988; X64: # %bb.0: 8989; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 8990; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 8991; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 8992; X64-NEXT: retq # encoding: [0xc3] 8993 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 8994 ret <32 x i8> %res 8995} 8996 8997define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 8998; X86-LABEL: test_mask_adds_epi8_rrkz_256: 8999; X86: # %bb.0: 9000; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9001; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 9002; X86-NEXT: retl # encoding: [0xc3] 9003; 9004; X64-LABEL: test_mask_adds_epi8_rrkz_256: 9005; X64: # %bb.0: 9006; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9007; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 9008; X64-NEXT: retq # encoding: [0xc3] 9009 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9010 ret <32 x i8> %res 9011} 9012 9013define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 9014; X86-LABEL: test_mask_adds_epi8_rm_256: 9015; X86: # %bb.0: 9016; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9017; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00] 9018; X86-NEXT: retl # encoding: [0xc3] 9019; 9020; X64-LABEL: test_mask_adds_epi8_rm_256: 9021; X64: # %bb.0: 9022; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07] 9023; X64-NEXT: retq # encoding: [0xc3] 9024 %b = load <32 x i8>, <32 x i8>* %ptr_b 9025 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9026 ret <32 x i8> %res 9027} 9028 9029define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 9030; X86-LABEL: test_mask_adds_epi8_rmk_256: 9031; X86: # %bb.0: 9032; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9033; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9034; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] 9035; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9036; X86-NEXT: retl # encoding: [0xc3] 9037; 9038; X64-LABEL: test_mask_adds_epi8_rmk_256: 9039; X64: # %bb.0: 9040; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9041; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 9042; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9043; X64-NEXT: retq # encoding: [0xc3] 9044 %b = load <32 x i8>, <32 x i8>* %ptr_b 9045 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9046 ret <32 x i8> %res 9047} 9048 9049define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 9050; X86-LABEL: test_mask_adds_epi8_rmkz_256: 9051; X86: # %bb.0: 9052; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9053; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9054; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] 9055; X86-NEXT: retl # encoding: [0xc3] 9056; 9057; X64-LABEL: test_mask_adds_epi8_rmkz_256: 9058; X64: # %bb.0: 9059; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9060; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 9061; X64-NEXT: retq # encoding: [0xc3] 9062 %b = load <32 x i8>, <32 x i8>* %ptr_b 9063 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9064 ret <32 x i8> %res 9065} 9066 9067declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 9068 9069define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 9070; CHECK-LABEL: test_mask_subs_epi8_rr_128: 9071; CHECK: # %bb.0: 9072; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 9073; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9074 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 9075 ret <16 x i8> %res 9076} 9077 9078define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 9079; X86-LABEL: test_mask_subs_epi8_rrk_128: 9080; X86: # %bb.0: 9081; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9082; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 9083; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9084; X86-NEXT: retl # encoding: [0xc3] 9085; 9086; X64-LABEL: test_mask_subs_epi8_rrk_128: 9087; X64: # %bb.0: 9088; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9089; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 9090; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9091; X64-NEXT: retq # encoding: [0xc3] 9092 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 9093 ret <16 x i8> %res 9094} 9095 9096define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 9097; X86-LABEL: test_mask_subs_epi8_rrkz_128: 9098; X86: # %bb.0: 9099; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9100; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 9101; X86-NEXT: retl # encoding: [0xc3] 9102; 9103; X64-LABEL: test_mask_subs_epi8_rrkz_128: 9104; X64: # %bb.0: 9105; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9106; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 9107; X64-NEXT: retq # encoding: [0xc3] 9108 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 9109 ret <16 x i8> %res 9110} 9111 9112define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 9113; X86-LABEL: test_mask_subs_epi8_rm_128: 9114; X86: # %bb.0: 9115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9116; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00] 9117; X86-NEXT: retl # encoding: [0xc3] 9118; 9119; X64-LABEL: test_mask_subs_epi8_rm_128: 9120; X64: # %bb.0: 9121; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07] 9122; X64-NEXT: retq # encoding: [0xc3] 9123 %b = load <16 x i8>, <16 x i8>* %ptr_b 9124 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 9125 ret <16 x i8> %res 9126} 9127 9128define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 9129; X86-LABEL: test_mask_subs_epi8_rmk_128: 9130; X86: # %bb.0: 9131; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9132; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 9133; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] 9134; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9135; X86-NEXT: retl # encoding: [0xc3] 9136; 9137; X64-LABEL: test_mask_subs_epi8_rmk_128: 9138; X64: # %bb.0: 9139; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9140; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 9141; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9142; X64-NEXT: retq # encoding: [0xc3] 9143 %b = load <16 x i8>, <16 x i8>* %ptr_b 9144 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 9145 ret <16 x i8> %res 9146} 9147 9148define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 9149; X86-LABEL: test_mask_subs_epi8_rmkz_128: 9150; X86: # %bb.0: 9151; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9152; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 9153; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] 9154; X86-NEXT: retl # encoding: [0xc3] 9155; 9156; X64-LABEL: test_mask_subs_epi8_rmkz_128: 9157; X64: # %bb.0: 9158; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9159; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 9160; X64-NEXT: retq # encoding: [0xc3] 9161 %b = load <16 x i8>, <16 x i8>* %ptr_b 9162 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 9163 ret <16 x i8> %res 9164} 9165 9166declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 9167 9168define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 9169; CHECK-LABEL: test_mask_subs_epi8_rr_256: 9170; CHECK: # %bb.0: 9171; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] 9172; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9173 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9174 ret <32 x i8> %res 9175} 9176 9177define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 9178; X86-LABEL: test_mask_subs_epi8_rrk_256: 9179; X86: # %bb.0: 9180; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9181; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 9182; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9183; X86-NEXT: retl # encoding: [0xc3] 9184; 9185; X64-LABEL: test_mask_subs_epi8_rrk_256: 9186; X64: # %bb.0: 9187; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9188; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 9189; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9190; X64-NEXT: retq # encoding: [0xc3] 9191 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9192 ret <32 x i8> %res 9193} 9194 9195define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 9196; X86-LABEL: test_mask_subs_epi8_rrkz_256: 9197; X86: # %bb.0: 9198; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 9199; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 9200; X86-NEXT: retl # encoding: [0xc3] 9201; 9202; X64-LABEL: test_mask_subs_epi8_rrkz_256: 9203; X64: # %bb.0: 9204; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9205; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 9206; X64-NEXT: retq # encoding: [0xc3] 9207 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9208 ret <32 x i8> %res 9209} 9210 9211define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 9212; X86-LABEL: test_mask_subs_epi8_rm_256: 9213; X86: # %bb.0: 9214; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9215; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00] 9216; X86-NEXT: retl # encoding: [0xc3] 9217; 9218; X64-LABEL: test_mask_subs_epi8_rm_256: 9219; X64: # %bb.0: 9220; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07] 9221; X64-NEXT: retq # encoding: [0xc3] 9222 %b = load <32 x i8>, <32 x i8>* %ptr_b 9223 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 9224 ret <32 x i8> %res 9225} 9226 9227define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 9228; X86-LABEL: test_mask_subs_epi8_rmk_256: 9229; X86: # %bb.0: 9230; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9231; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9232; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] 9233; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9234; X86-NEXT: retl # encoding: [0xc3] 9235; 9236; X64-LABEL: test_mask_subs_epi8_rmk_256: 9237; X64: # %bb.0: 9238; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9239; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 9240; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9241; X64-NEXT: retq # encoding: [0xc3] 9242 %b = load <32 x i8>, <32 x i8>* %ptr_b 9243 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 9244 ret <32 x i8> %res 9245} 9246 9247define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 9248; X86-LABEL: test_mask_subs_epi8_rmkz_256: 9249; X86: # %bb.0: 9250; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9251; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 9252; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] 9253; X86-NEXT: retl # encoding: [0xc3] 9254; 9255; X64-LABEL: test_mask_subs_epi8_rmkz_256: 9256; X64: # %bb.0: 9257; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 9258; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 9259; X64-NEXT: retq # encoding: [0xc3] 9260 %b = load <32 x i8>, <32 x i8>* %ptr_b 9261 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 9262 ret <32 x i8> %res 9263} 9264 9265declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 9266 9267declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9268 9269define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9270; CHECK-LABEL: test_int_x86_avx512_psrav16_hi: 9271; CHECK: # %bb.0: 9272; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] 9273; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9274 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9275 ret <16 x i16> %res 9276} 9277 9278define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9279; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi: 9280; X86: # %bb.0: 9281; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9282; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 9283; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9284; X86-NEXT: retl # encoding: [0xc3] 9285; 9286; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi: 9287; X64: # %bb.0: 9288; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9289; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 9290; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9291; X64-NEXT: retq # encoding: [0xc3] 9292 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9293 ret <16 x i16> %res 9294} 9295 9296define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9297; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 9298; X86: # %bb.0: 9299; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9300; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 9301; X86-NEXT: retl # encoding: [0xc3] 9302; 9303; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 9304; X64: # %bb.0: 9305; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9306; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 9307; X64-NEXT: retq # encoding: [0xc3] 9308 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9309 ret <16 x i16> %res 9310} 9311 9312declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9313 9314define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9315; CHECK-LABEL: test_int_x86_avx512_psrav8_hi: 9316; CHECK: # %bb.0: 9317; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] 9318; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9319 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9320 ret <8 x i16> %res 9321} 9322 9323define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9324; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi: 9325; X86: # %bb.0: 9326; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9327; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9328; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 9329; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9330; X86-NEXT: retl # encoding: [0xc3] 9331; 9332; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi: 9333; X64: # %bb.0: 9334; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9335; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 9336; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9337; X64-NEXT: retq # encoding: [0xc3] 9338 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9339 ret <8 x i16> %res 9340} 9341 9342define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9343; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 9344; X86: # %bb.0: 9345; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9346; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9347; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 9348; X86-NEXT: retl # encoding: [0xc3] 9349; 9350; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 9351; X64: # %bb.0: 9352; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9353; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 9354; X64-NEXT: retq # encoding: [0xc3] 9355 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9356 ret <8 x i16> %res 9357} 9358 9359declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9360 9361define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9362; CHECK-LABEL: test_int_x86_avx512_psllv16_hi: 9363; CHECK: # %bb.0: 9364; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] 9365; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9366 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9367 ret <16 x i16> %res 9368} 9369 9370define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9371; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi: 9372; X86: # %bb.0: 9373; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9374; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 9375; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9376; X86-NEXT: retl # encoding: [0xc3] 9377; 9378; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi: 9379; X64: # %bb.0: 9380; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9381; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 9382; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9383; X64-NEXT: retq # encoding: [0xc3] 9384 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9385 ret <16 x i16> %res 9386} 9387 9388define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9389; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 9390; X86: # %bb.0: 9391; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9392; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 9393; X86-NEXT: retl # encoding: [0xc3] 9394; 9395; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 9396; X64: # %bb.0: 9397; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9398; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 9399; X64-NEXT: retq # encoding: [0xc3] 9400 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9401 ret <16 x i16> %res 9402} 9403 9404declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9405 9406define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9407; CHECK-LABEL: test_int_x86_avx512_psllv8_hi: 9408; CHECK: # %bb.0: 9409; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] 9410; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9411 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9412 ret <8 x i16> %res 9413} 9414 9415define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9416; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi: 9417; X86: # %bb.0: 9418; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9419; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9420; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 9421; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9422; X86-NEXT: retl # encoding: [0xc3] 9423; 9424; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi: 9425; X64: # %bb.0: 9426; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9427; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 9428; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9429; X64-NEXT: retq # encoding: [0xc3] 9430 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9431 ret <8 x i16> %res 9432} 9433 9434define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9435; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 9436; X86: # %bb.0: 9437; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9438; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9439; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 9440; X86-NEXT: retl # encoding: [0xc3] 9441; 9442; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 9443; X64: # %bb.0: 9444; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9445; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 9446; X64-NEXT: retq # encoding: [0xc3] 9447 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9448 ret <8 x i16> %res 9449} 9450 9451declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 9452 9453define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 9454; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi: 9455; CHECK: # %bb.0: 9456; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] 9457; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9458 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 9459 ret <16 x i16> %res 9460} 9461 9462define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 9463; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 9464; X86: # %bb.0: 9465; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9466; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 9467; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9468; X86-NEXT: retl # encoding: [0xc3] 9469; 9470; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 9471; X64: # %bb.0: 9472; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9473; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 9474; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9475; X64-NEXT: retq # encoding: [0xc3] 9476 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 9477 ret <16 x i16> %res 9478} 9479 9480define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 9481; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 9482; X86: # %bb.0: 9483; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9484; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 9485; X86-NEXT: retl # encoding: [0xc3] 9486; 9487; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 9488; X64: # %bb.0: 9489; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9490; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 9491; X64-NEXT: retq # encoding: [0xc3] 9492 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 9493 ret <16 x i16> %res 9494} 9495 9496declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 9497 9498define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 9499; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi: 9500; CHECK: # %bb.0: 9501; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] 9502; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9503 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 9504 ret <8 x i16> %res 9505} 9506 9507define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 9508; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 9509; X86: # %bb.0: 9510; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9511; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9512; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 9513; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9514; X86-NEXT: retl # encoding: [0xc3] 9515; 9516; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 9517; X64: # %bb.0: 9518; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9519; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 9520; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9521; X64-NEXT: retq # encoding: [0xc3] 9522 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 9523 ret <8 x i16> %res 9524} 9525 9526define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 9527; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 9528; X86: # %bb.0: 9529; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9530; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 9531; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 9532; X86-NEXT: retl # encoding: [0xc3] 9533; 9534; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 9535; X64: # %bb.0: 9536; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9537; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 9538; X64-NEXT: retq # encoding: [0xc3] 9539 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 9540 ret <8 x i16> %res 9541} 9542 9543declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) 9544 9545define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1) { 9546; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256: 9547; CHECK: # %bb.0: 9548; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 9549; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9550; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9551 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 9552 ret <16 x i8> %res 9553} 9554 9555define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 9556; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 9557; X86: # %bb.0: 9558; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9559; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 9560; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9561; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9562; X86-NEXT: retl # encoding: [0xc3] 9563; 9564; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 9565; X64: # %bb.0: 9566; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9567; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 9568; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9569; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9570; X64-NEXT: retq # encoding: [0xc3] 9571 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 9572 ret <16 x i8> %res 9573} 9574 9575define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) { 9576; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 9577; X86: # %bb.0: 9578; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 9579; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 9580; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9581; X86-NEXT: retl # encoding: [0xc3] 9582; 9583; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 9584; X64: # %bb.0: 9585; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 9586; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 9587; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9588; X64-NEXT: retq # encoding: [0xc3] 9589 %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 9590 ret <16 x i8> %res 9591} 9592