1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 6; CHECK-LABEL: test_mask_packs_epi32_rr_128: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 9; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 11 ret <8 x i16> %1 12} 13 14define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 15; X86-LABEL: test_mask_packs_epi32_rrk_128: 16; X86: # %bb.0: 17; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 18; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 19; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 20; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 21; X86-NEXT: retl # encoding: [0xc3] 22; 23; X64-LABEL: test_mask_packs_epi32_rrk_128: 24; X64: # %bb.0: 25; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 26; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 27; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 28; X64-NEXT: retq # encoding: [0xc3] 29 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 30 %2 = bitcast i8 %mask to <8 x i1> 31 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 32 ret <8 x i16> %3 33} 34 35define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 36; X86-LABEL: test_mask_packs_epi32_rrkz_128: 37; X86: # %bb.0: 38; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 39; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 40; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 41; X86-NEXT: retl # encoding: [0xc3] 42; 43; X64-LABEL: test_mask_packs_epi32_rrkz_128: 44; X64: # %bb.0: 45; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 46; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 47; X64-NEXT: retq # encoding: [0xc3] 48 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 49 %2 = bitcast i8 %mask to <8 x i1> 50 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 51 ret <8 x i16> %3 52} 53 54define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 55; X86-LABEL: test_mask_packs_epi32_rm_128: 56; X86: # %bb.0: 57; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 58; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 59; X86-NEXT: retl # encoding: [0xc3] 60; 61; X64-LABEL: test_mask_packs_epi32_rm_128: 62; X64: # %bb.0: 63; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 64; X64-NEXT: retq # encoding: [0xc3] 65 %b = load <4 x i32>, <4 x i32>* %ptr_b 66 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 67 ret <8 x i16> %1 68} 69 70define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 71; X86-LABEL: test_mask_packs_epi32_rmk_128: 72; X86: # %bb.0: 73; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 75; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 76; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 77; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 78; X86-NEXT: retl # encoding: [0xc3] 79; 80; X64-LABEL: test_mask_packs_epi32_rmk_128: 81; X64: # %bb.0: 82; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 83; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 84; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 85; X64-NEXT: retq # encoding: [0xc3] 86 %b = load <4 x i32>, <4 x i32>* %ptr_b 87 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 88 %2 = bitcast i8 %mask to <8 x i1> 89 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 90 ret <8 x i16> %3 91} 92 93define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 94; X86-LABEL: test_mask_packs_epi32_rmkz_128: 95; X86: # %bb.0: 96; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 97; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 98; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 99; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 100; X86-NEXT: retl # encoding: [0xc3] 101; 102; X64-LABEL: test_mask_packs_epi32_rmkz_128: 103; X64: # %bb.0: 104; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 105; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 106; X64-NEXT: retq # encoding: [0xc3] 107 %b = load <4 x i32>, <4 x i32>* %ptr_b 108 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 109 %2 = bitcast i8 %mask to <8 x i1> 110 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 111 ret <8 x i16> %3 112} 113 114define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 115; X86-LABEL: test_mask_packs_epi32_rmb_128: 116; X86: # %bb.0: 117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 118; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 119; X86-NEXT: retl # encoding: [0xc3] 120; 121; X64-LABEL: test_mask_packs_epi32_rmb_128: 122; X64: # %bb.0: 123; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 124; X64-NEXT: retq # encoding: [0xc3] 125 %q = load i32, i32* %ptr_b 126 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 127 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 128 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 129 ret <8 x i16> %1 130} 131 132define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 133; X86-LABEL: test_mask_packs_epi32_rmbk_128: 134; X86: # %bb.0: 135; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 136; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 137; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 138; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 139; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 140; X86-NEXT: retl # encoding: [0xc3] 141; 142; X64-LABEL: test_mask_packs_epi32_rmbk_128: 143; X64: # %bb.0: 144; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 145; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 146; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 147; X64-NEXT: retq # encoding: [0xc3] 148 %q = load i32, i32* %ptr_b 149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 151 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 152 %2 = bitcast i8 %mask to <8 x i1> 153 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 154 ret <8 x i16> %3 155} 156 157define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 158; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 159; X86: # %bb.0: 160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 162; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 163; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 164; X86-NEXT: retl # encoding: [0xc3] 165; 166; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 167; X64: # %bb.0: 168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 169; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 170; X64-NEXT: retq # encoding: [0xc3] 171 %q = load i32, i32* %ptr_b 172 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 173 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 174 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 175 %2 = bitcast i8 %mask to <8 x i1> 176 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 177 ret <8 x i16> %3 178} 179 180declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) 181 182define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 183; CHECK-LABEL: test_mask_packs_epi32_rr_256: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 186; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 187 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 188 ret <16 x i16> %1 189} 190 191define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 192; X86-LABEL: test_mask_packs_epi32_rrk_256: 193; X86: # %bb.0: 194; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 195; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 196; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 197; X86-NEXT: retl # encoding: [0xc3] 198; 199; X64-LABEL: test_mask_packs_epi32_rrk_256: 200; X64: # %bb.0: 201; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 202; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 203; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 204; X64-NEXT: retq # encoding: [0xc3] 205 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 206 %2 = bitcast i16 %mask to <16 x i1> 207 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 208 ret <16 x i16> %3 209} 210 211define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 212; X86-LABEL: test_mask_packs_epi32_rrkz_256: 213; X86: # %bb.0: 214; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 215; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 216; X86-NEXT: retl # encoding: [0xc3] 217; 218; X64-LABEL: test_mask_packs_epi32_rrkz_256: 219; X64: # %bb.0: 220; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 221; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 222; X64-NEXT: retq # encoding: [0xc3] 223 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 224 %2 = bitcast i16 %mask to <16 x i1> 225 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 226 ret <16 x i16> %3 227} 228 229define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 230; X86-LABEL: test_mask_packs_epi32_rm_256: 231; X86: # %bb.0: 232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 233; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 234; X86-NEXT: retl # encoding: [0xc3] 235; 236; X64-LABEL: test_mask_packs_epi32_rm_256: 237; X64: # %bb.0: 238; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 239; X64-NEXT: retq # encoding: [0xc3] 240 %b = load <8 x i32>, <8 x i32>* %ptr_b 241 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 242 ret <16 x i16> %1 243} 244 245define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 246; X86-LABEL: test_mask_packs_epi32_rmk_256: 247; X86: # %bb.0: 248; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 249; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 250; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 251; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 252; X86-NEXT: retl # encoding: [0xc3] 253; 254; X64-LABEL: test_mask_packs_epi32_rmk_256: 255; X64: # %bb.0: 256; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 257; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 258; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 259; X64-NEXT: retq # encoding: [0xc3] 260 %b = load <8 x i32>, <8 x i32>* %ptr_b 261 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 262 %2 = bitcast i16 %mask to <16 x i1> 263 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 264 ret <16 x i16> %3 265} 266 267define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 268; X86-LABEL: test_mask_packs_epi32_rmkz_256: 269; X86: # %bb.0: 270; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 271; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 272; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 273; X86-NEXT: retl # encoding: [0xc3] 274; 275; X64-LABEL: test_mask_packs_epi32_rmkz_256: 276; X64: # %bb.0: 277; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 278; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 279; X64-NEXT: retq # encoding: [0xc3] 280 %b = load <8 x i32>, <8 x i32>* %ptr_b 281 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 282 %2 = bitcast i16 %mask to <16 x i1> 283 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 284 ret <16 x i16> %3 285} 286 287define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 288; X86-LABEL: test_mask_packs_epi32_rmb_256: 289; X86: # %bb.0: 290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 291; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_mask_packs_epi32_rmb_256: 295; X64: # %bb.0: 296; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 297; X64-NEXT: retq # encoding: [0xc3] 298 %q = load i32, i32* %ptr_b 299 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 300 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 301 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 302 ret <16 x i16> %1 303} 304 305define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 306; X86-LABEL: test_mask_packs_epi32_rmbk_256: 307; X86: # %bb.0: 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 309; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 310; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 311; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 312; X86-NEXT: retl # encoding: [0xc3] 313; 314; X64-LABEL: test_mask_packs_epi32_rmbk_256: 315; X64: # %bb.0: 316; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 317; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 318; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 319; X64-NEXT: retq # encoding: [0xc3] 320 %q = load i32, i32* %ptr_b 321 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 322 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 323 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 324 %2 = bitcast i16 %mask to <16 x i1> 325 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 326 ret <16 x i16> %3 327} 328 329define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 330; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 331; X86: # %bb.0: 332; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 333; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 334; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 335; X86-NEXT: retl # encoding: [0xc3] 336; 337; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 338; X64: # %bb.0: 339; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 340; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 341; X64-NEXT: retq # encoding: [0xc3] 342 %q = load i32, i32* %ptr_b 343 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 344 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 345 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 346 %2 = bitcast i16 %mask to <16 x i1> 347 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 348 ret <16 x i16> %3 349} 350 351declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) 352 353define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 354; CHECK-LABEL: test_mask_packs_epi16_rr_128: 355; CHECK: # %bb.0: 356; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 357; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 358 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 359 ret <16 x i8> %1 360} 361 362define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 363; X86-LABEL: test_mask_packs_epi16_rrk_128: 364; X86: # %bb.0: 365; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 366; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 367; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 368; X86-NEXT: retl # encoding: [0xc3] 369; 370; X64-LABEL: test_mask_packs_epi16_rrk_128: 371; X64: # %bb.0: 372; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 373; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 374; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 375; X64-NEXT: retq # encoding: [0xc3] 376 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 377 %2 = bitcast i16 %mask to <16 x i1> 378 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 379 ret <16 x i8> %3 380} 381 382define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 383; X86-LABEL: test_mask_packs_epi16_rrkz_128: 384; X86: # %bb.0: 385; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 386; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 387; X86-NEXT: retl # encoding: [0xc3] 388; 389; X64-LABEL: test_mask_packs_epi16_rrkz_128: 390; X64: # %bb.0: 391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 392; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 393; X64-NEXT: retq # encoding: [0xc3] 394 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 395 %2 = bitcast i16 %mask to <16 x i1> 396 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 397 ret <16 x i8> %3 398} 399 400define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 401; X86-LABEL: test_mask_packs_epi16_rm_128: 402; X86: # %bb.0: 403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 404; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 405; X86-NEXT: retl # encoding: [0xc3] 406; 407; X64-LABEL: test_mask_packs_epi16_rm_128: 408; X64: # %bb.0: 409; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 410; X64-NEXT: retq # encoding: [0xc3] 411 %b = load <8 x i16>, <8 x i16>* %ptr_b 412 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 413 ret <16 x i8> %1 414} 415 416define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 417; X86-LABEL: test_mask_packs_epi16_rmk_128: 418; X86: # %bb.0: 419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 420; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 421; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 422; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_mask_packs_epi16_rmk_128: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 428; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 429; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 430; X64-NEXT: retq # encoding: [0xc3] 431 %b = load <8 x i16>, <8 x i16>* %ptr_b 432 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 433 %2 = bitcast i16 %mask to <16 x i1> 434 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 435 ret <16 x i8> %3 436} 437 438define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 439; X86-LABEL: test_mask_packs_epi16_rmkz_128: 440; X86: # %bb.0: 441; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 442; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 443; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 444; X86-NEXT: retl # encoding: [0xc3] 445; 446; X64-LABEL: test_mask_packs_epi16_rmkz_128: 447; X64: # %bb.0: 448; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 449; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 450; X64-NEXT: retq # encoding: [0xc3] 451 %b = load <8 x i16>, <8 x i16>* %ptr_b 452 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 453 %2 = bitcast i16 %mask to <16 x i1> 454 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 455 ret <16 x i8> %3 456} 457 458declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) 459 460define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 461; CHECK-LABEL: test_mask_packs_epi16_rr_256: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 464; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 465 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 466 ret <32 x i8> %1 467} 468 469define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 470; X86-LABEL: test_mask_packs_epi16_rrk_256: 471; X86: # %bb.0: 472; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 473; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 474; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 475; X86-NEXT: retl # encoding: [0xc3] 476; 477; X64-LABEL: test_mask_packs_epi16_rrk_256: 478; X64: # %bb.0: 479; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 480; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 481; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 482; X64-NEXT: retq # encoding: [0xc3] 483 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 484 %2 = bitcast i32 %mask to <32 x i1> 485 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 486 ret <32 x i8> %3 487} 488 489define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 490; X86-LABEL: test_mask_packs_epi16_rrkz_256: 491; X86: # %bb.0: 492; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 493; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 494; X86-NEXT: retl # encoding: [0xc3] 495; 496; X64-LABEL: test_mask_packs_epi16_rrkz_256: 497; X64: # %bb.0: 498; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 499; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 500; X64-NEXT: retq # encoding: [0xc3] 501 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 502 %2 = bitcast i32 %mask to <32 x i1> 503 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 504 ret <32 x i8> %3 505} 506 507define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 508; X86-LABEL: test_mask_packs_epi16_rm_256: 509; X86: # %bb.0: 510; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 511; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 512; X86-NEXT: retl # encoding: [0xc3] 513; 514; X64-LABEL: test_mask_packs_epi16_rm_256: 515; X64: # %bb.0: 516; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 517; X64-NEXT: retq # encoding: [0xc3] 518 %b = load <16 x i16>, <16 x i16>* %ptr_b 519 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 520 ret <32 x i8> %1 521} 522 523define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 524; X86-LABEL: test_mask_packs_epi16_rmk_256: 525; X86: # %bb.0: 526; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 527; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 528; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 529; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_packs_epi16_rmk_256: 533; X64: # %bb.0: 534; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 535; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 536; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 537; X64-NEXT: retq # encoding: [0xc3] 538 %b = load <16 x i16>, <16 x i16>* %ptr_b 539 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 540 %2 = bitcast i32 %mask to <32 x i1> 541 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 542 ret <32 x i8> %3 543} 544 545define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 546; X86-LABEL: test_mask_packs_epi16_rmkz_256: 547; X86: # %bb.0: 548; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 549; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 550; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 551; X86-NEXT: retl # encoding: [0xc3] 552; 553; X64-LABEL: test_mask_packs_epi16_rmkz_256: 554; X64: # %bb.0: 555; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 556; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 557; X64-NEXT: retq # encoding: [0xc3] 558 %b = load <16 x i16>, <16 x i16>* %ptr_b 559 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 560 %2 = bitcast i32 %mask to <32 x i1> 561 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 562 ret <32 x i8> %3 563} 564 565declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) 566 567 568define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 569; CHECK-LABEL: test_mask_packus_epi32_rr_128: 570; CHECK: # %bb.0: 571; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 572; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 573 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 574 ret <8 x i16> %1 575} 576 577define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 578; X86-LABEL: test_mask_packus_epi32_rrk_128: 579; X86: # %bb.0: 580; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 581; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 582; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 583; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 584; X86-NEXT: retl # encoding: [0xc3] 585; 586; X64-LABEL: test_mask_packus_epi32_rrk_128: 587; X64: # %bb.0: 588; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 589; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 590; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 591; X64-NEXT: retq # encoding: [0xc3] 592 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 593 %2 = bitcast i8 %mask to <8 x i1> 594 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 595 ret <8 x i16> %3 596} 597 598define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 599; X86-LABEL: test_mask_packus_epi32_rrkz_128: 600; X86: # %bb.0: 601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 602; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 603; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 604; X86-NEXT: retl # encoding: [0xc3] 605; 606; X64-LABEL: test_mask_packus_epi32_rrkz_128: 607; X64: # %bb.0: 608; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 609; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 610; X64-NEXT: retq # encoding: [0xc3] 611 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 612 %2 = bitcast i8 %mask to <8 x i1> 613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 614 ret <8 x i16> %3 615} 616 617define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 618; X86-LABEL: test_mask_packus_epi32_rm_128: 619; X86: # %bb.0: 620; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 621; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 622; X86-NEXT: retl # encoding: [0xc3] 623; 624; X64-LABEL: test_mask_packus_epi32_rm_128: 625; X64: # %bb.0: 626; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 627; X64-NEXT: retq # encoding: [0xc3] 628 %b = load <4 x i32>, <4 x i32>* %ptr_b 629 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 630 ret <8 x i16> %1 631} 632 633define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 634; X86-LABEL: test_mask_packus_epi32_rmk_128: 635; X86: # %bb.0: 636; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 637; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 638; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 639; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 640; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 641; X86-NEXT: retl # encoding: [0xc3] 642; 643; X64-LABEL: test_mask_packus_epi32_rmk_128: 644; X64: # %bb.0: 645; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 646; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 647; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 648; X64-NEXT: retq # encoding: [0xc3] 649 %b = load <4 x i32>, <4 x i32>* %ptr_b 650 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 651 %2 = bitcast i8 %mask to <8 x i1> 652 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 653 ret <8 x i16> %3 654} 655 656define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 657; X86-LABEL: test_mask_packus_epi32_rmkz_128: 658; X86: # %bb.0: 659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 660; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 661; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 662; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 663; X86-NEXT: retl # encoding: [0xc3] 664; 665; X64-LABEL: test_mask_packus_epi32_rmkz_128: 666; X64: # %bb.0: 667; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 668; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 669; X64-NEXT: retq # encoding: [0xc3] 670 %b = load <4 x i32>, <4 x i32>* %ptr_b 671 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 672 %2 = bitcast i8 %mask to <8 x i1> 673 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 674 ret <8 x i16> %3 675} 676 677define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 678; X86-LABEL: test_mask_packus_epi32_rmb_128: 679; X86: # %bb.0: 680; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 681; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 682; X86-NEXT: retl # encoding: [0xc3] 683; 684; X64-LABEL: test_mask_packus_epi32_rmb_128: 685; X64: # %bb.0: 686; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 687; X64-NEXT: retq # encoding: [0xc3] 688 %q = load i32, i32* %ptr_b 689 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 690 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 691 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 692 ret <8 x i16> %1 693} 694 695define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 696; X86-LABEL: test_mask_packus_epi32_rmbk_128: 697; X86: # %bb.0: 698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 699; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 700; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 701; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 702; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 703; X86-NEXT: retl # encoding: [0xc3] 704; 705; X64-LABEL: test_mask_packus_epi32_rmbk_128: 706; X64: # %bb.0: 707; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 708; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 709; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 710; X64-NEXT: retq # encoding: [0xc3] 711 %q = load i32, i32* %ptr_b 712 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 713 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 714 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 715 %2 = bitcast i8 %mask to <8 x i1> 716 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 717 ret <8 x i16> %3 718} 719 720define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 721; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 722; X86: # %bb.0: 723; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 725; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 726; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 727; X86-NEXT: retl # encoding: [0xc3] 728; 729; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 730; X64: # %bb.0: 731; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 732; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 733; X64-NEXT: retq # encoding: [0xc3] 734 %q = load i32, i32* %ptr_b 735 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 736 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 737 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 738 %2 = bitcast i8 %mask to <8 x i1> 739 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 740 ret <8 x i16> %3 741} 742 743declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) 744 745define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 746; CHECK-LABEL: test_mask_packus_epi32_rr_256: 747; CHECK: # %bb.0: 748; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 749; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 750 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 751 ret <16 x i16> %1 752} 753 754define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 755; X86-LABEL: test_mask_packus_epi32_rrk_256: 756; X86: # %bb.0: 757; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 758; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 759; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 760; X86-NEXT: retl # encoding: [0xc3] 761; 762; X64-LABEL: test_mask_packus_epi32_rrk_256: 763; X64: # %bb.0: 764; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 765; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 766; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 767; X64-NEXT: retq # encoding: [0xc3] 768 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 769 %2 = bitcast i16 %mask to <16 x i1> 770 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 771 ret <16 x i16> %3 772} 773 774define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 775; X86-LABEL: test_mask_packus_epi32_rrkz_256: 776; X86: # %bb.0: 777; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 778; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 779; X86-NEXT: retl # encoding: [0xc3] 780; 781; X64-LABEL: test_mask_packus_epi32_rrkz_256: 782; X64: # %bb.0: 783; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 784; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 785; X64-NEXT: retq # encoding: [0xc3] 786 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 787 %2 = bitcast i16 %mask to <16 x i1> 788 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 789 ret <16 x i16> %3 790} 791 792define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 793; X86-LABEL: test_mask_packus_epi32_rm_256: 794; X86: # %bb.0: 795; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 796; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 797; X86-NEXT: retl # encoding: [0xc3] 798; 799; X64-LABEL: test_mask_packus_epi32_rm_256: 800; X64: # %bb.0: 801; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 802; X64-NEXT: retq # encoding: [0xc3] 803 %b = load <8 x i32>, <8 x i32>* %ptr_b 804 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 805 ret <16 x i16> %1 806} 807 808define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 809; X86-LABEL: test_mask_packus_epi32_rmk_256: 810; X86: # %bb.0: 811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 813; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 814; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_mask_packus_epi32_rmk_256: 818; X64: # %bb.0: 819; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 820; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 821; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 822; X64-NEXT: retq # encoding: [0xc3] 823 %b = load <8 x i32>, <8 x i32>* %ptr_b 824 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 825 %2 = bitcast i16 %mask to <16 x i1> 826 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 827 ret <16 x i16> %3 828} 829 830define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 831; X86-LABEL: test_mask_packus_epi32_rmkz_256: 832; X86: # %bb.0: 833; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 834; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 835; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 836; X86-NEXT: retl # encoding: [0xc3] 837; 838; X64-LABEL: test_mask_packus_epi32_rmkz_256: 839; X64: # %bb.0: 840; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 841; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 842; X64-NEXT: retq # encoding: [0xc3] 843 %b = load <8 x i32>, <8 x i32>* %ptr_b 844 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 845 %2 = bitcast i16 %mask to <16 x i1> 846 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 847 ret <16 x i16> %3 848} 849 850define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 851; X86-LABEL: test_mask_packus_epi32_rmb_256: 852; X86: # %bb.0: 853; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 854; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 855; X86-NEXT: retl # encoding: [0xc3] 856; 857; X64-LABEL: test_mask_packus_epi32_rmb_256: 858; X64: # %bb.0: 859; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 860; X64-NEXT: retq # encoding: [0xc3] 861 %q = load i32, i32* %ptr_b 862 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 863 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 864 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 865 ret <16 x i16> %1 866} 867 868define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 869; X86-LABEL: test_mask_packus_epi32_rmbk_256: 870; X86: # %bb.0: 871; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 872; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 873; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 874; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 875; X86-NEXT: retl # encoding: [0xc3] 876; 877; X64-LABEL: test_mask_packus_epi32_rmbk_256: 878; X64: # %bb.0: 879; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 880; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 881; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 882; X64-NEXT: retq # encoding: [0xc3] 883 %q = load i32, i32* %ptr_b 884 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 885 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 886 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 887 %2 = bitcast i16 %mask to <16 x i1> 888 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 889 ret <16 x i16> %3 890} 891 892define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 893; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 894; X86: # %bb.0: 895; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 896; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 897; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 898; X86-NEXT: retl # encoding: [0xc3] 899; 900; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 901; X64: # %bb.0: 902; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 903; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 904; X64-NEXT: retq # encoding: [0xc3] 905 %q = load i32, i32* %ptr_b 906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 908 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 909 %2 = bitcast i16 %mask to <16 x i1> 910 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 911 ret <16 x i16> %3 912} 913 914declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) 915 916define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 917; CHECK-LABEL: test_mask_packus_epi16_rr_128: 918; CHECK: # %bb.0: 919; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 920; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 921 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 922 ret <16 x i8> %1 923} 924 925define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 926; X86-LABEL: test_mask_packus_epi16_rrk_128: 927; X86: # %bb.0: 928; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 929; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 930; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 931; X86-NEXT: retl # encoding: [0xc3] 932; 933; X64-LABEL: test_mask_packus_epi16_rrk_128: 934; X64: # %bb.0: 935; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 936; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 937; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 938; X64-NEXT: retq # encoding: [0xc3] 939 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 940 %2 = bitcast i16 %mask to <16 x i1> 941 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 942 ret <16 x i8> %3 943} 944 945define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 946; X86-LABEL: test_mask_packus_epi16_rrkz_128: 947; X86: # %bb.0: 948; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 949; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 950; X86-NEXT: retl # encoding: [0xc3] 951; 952; X64-LABEL: test_mask_packus_epi16_rrkz_128: 953; X64: # %bb.0: 954; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 955; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 956; X64-NEXT: retq # encoding: [0xc3] 957 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 958 %2 = bitcast i16 %mask to <16 x i1> 959 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 960 ret <16 x i8> %3 961} 962 963define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 964; X86-LABEL: test_mask_packus_epi16_rm_128: 965; X86: # %bb.0: 966; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 967; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 968; X86-NEXT: retl # encoding: [0xc3] 969; 970; X64-LABEL: test_mask_packus_epi16_rm_128: 971; X64: # %bb.0: 972; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 973; X64-NEXT: retq # encoding: [0xc3] 974 %b = load <8 x i16>, <8 x i16>* %ptr_b 975 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 976 ret <16 x i8> %1 977} 978 979define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 980; X86-LABEL: test_mask_packus_epi16_rmk_128: 981; X86: # %bb.0: 982; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 983; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 984; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 985; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 986; X86-NEXT: retl # encoding: [0xc3] 987; 988; X64-LABEL: test_mask_packus_epi16_rmk_128: 989; X64: # %bb.0: 990; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 991; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 992; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 993; X64-NEXT: retq # encoding: [0xc3] 994 %b = load <8 x i16>, <8 x i16>* %ptr_b 995 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 996 %2 = bitcast i16 %mask to <16 x i1> 997 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 998 ret <16 x i8> %3 999} 1000 1001define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 1002; X86-LABEL: test_mask_packus_epi16_rmkz_128: 1003; X86: # %bb.0: 1004; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1005; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1006; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 1007; X86-NEXT: retl # encoding: [0xc3] 1008; 1009; X64-LABEL: test_mask_packus_epi16_rmkz_128: 1010; X64: # %bb.0: 1011; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1012; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 1013; X64-NEXT: retq # encoding: [0xc3] 1014 %b = load <8 x i16>, <8 x i16>* %ptr_b 1015 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 1016 %2 = bitcast i16 %mask to <16 x i1> 1017 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 1018 ret <16 x i8> %3 1019} 1020 1021declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) 1022 1023define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1024; CHECK-LABEL: test_mask_packus_epi16_rr_256: 1025; CHECK: # %bb.0: 1026; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 1027; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1028 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1029 ret <32 x i8> %1 1030} 1031 1032define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 1033; X86-LABEL: test_mask_packus_epi16_rrk_256: 1034; X86: # %bb.0: 1035; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1036; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1037; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1038; X86-NEXT: retl # encoding: [0xc3] 1039; 1040; X64-LABEL: test_mask_packus_epi16_rrk_256: 1041; X64: # %bb.0: 1042; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1043; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1044; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1045; X64-NEXT: retq # encoding: [0xc3] 1046 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1047 %2 = bitcast i32 %mask to <32 x i1> 1048 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1049 ret <32 x i8> %3 1050} 1051 1052define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 1053; X86-LABEL: test_mask_packus_epi16_rrkz_256: 1054; X86: # %bb.0: 1055; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1056; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1057; X86-NEXT: retl # encoding: [0xc3] 1058; 1059; X64-LABEL: test_mask_packus_epi16_rrkz_256: 1060; X64: # %bb.0: 1061; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1062; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1063; X64-NEXT: retq # encoding: [0xc3] 1064 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1065 %2 = bitcast i32 %mask to <32 x i1> 1066 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1067 ret <32 x i8> %3 1068} 1069 1070define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1071; X86-LABEL: test_mask_packus_epi16_rm_256: 1072; X86: # %bb.0: 1073; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1074; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 1075; X86-NEXT: retl # encoding: [0xc3] 1076; 1077; X64-LABEL: test_mask_packus_epi16_rm_256: 1078; X64: # %bb.0: 1079; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 1080; X64-NEXT: retq # encoding: [0xc3] 1081 %b = load <16 x i16>, <16 x i16>* %ptr_b 1082 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1083 ret <32 x i8> %1 1084} 1085 1086define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 1087; X86-LABEL: test_mask_packus_epi16_rmk_256: 1088; X86: # %bb.0: 1089; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1090; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1091; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 1092; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1093; X86-NEXT: retl # encoding: [0xc3] 1094; 1095; X64-LABEL: test_mask_packus_epi16_rmk_256: 1096; X64: # %bb.0: 1097; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1098; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 1099; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1100; X64-NEXT: retq # encoding: [0xc3] 1101 %b = load <16 x i16>, <16 x i16>* %ptr_b 1102 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1103 %2 = bitcast i32 %mask to <32 x i1> 1104 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1105 ret <32 x i8> %3 1106} 1107 1108define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 1109; X86-LABEL: test_mask_packus_epi16_rmkz_256: 1110; X86: # %bb.0: 1111; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1112; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1113; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 1114; X86-NEXT: retl # encoding: [0xc3] 1115; 1116; X64-LABEL: test_mask_packus_epi16_rmkz_256: 1117; X64: # %bb.0: 1118; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1119; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 1120; X64-NEXT: retq # encoding: [0xc3] 1121 %b = load <16 x i16>, <16 x i16>* %ptr_b 1122 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1123 %2 = bitcast i32 %mask to <32 x i1> 1124 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1125 ret <32 x i8> %3 1126} 1127 1128declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) 1129 1130define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1131; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128: 1132; CHECK: # %bb.0: 1133; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2] 1134; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1135 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1136 ret <8 x i16> %1 1137} 1138 1139define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1140; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 1141; X86: # %bb.0: 1142; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1143; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1144; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 1145; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1146; X86-NEXT: retl # encoding: [0xc3] 1147; 1148; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 1149; X64: # %bb.0: 1150; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1151; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 1152; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1153; X64-NEXT: retq # encoding: [0xc3] 1154 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1155 %2 = bitcast i8 %x3 to <8 x i1> 1156 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 1157 ret <8 x i16> %3 1158} 1159 1160define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1161; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 1162; X86: # %bb.0: 1163; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1164; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1165; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 1166; X86-NEXT: retl # encoding: [0xc3] 1167; 1168; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 1169; X64: # %bb.0: 1170; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1171; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2] 1172; X64-NEXT: retq # encoding: [0xc3] 1173 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1174 %2 = bitcast i8 %x3 to <8 x i1> 1175 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 1176 ret <8 x i16> %3 1177} 1178 1179define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1180; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256: 1181; CHECK: # %bb.0: 1182; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2] 1183; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1184 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1185 ret <16 x i16> %1 1186} 1187 1188define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1189; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 1190; X86: # %bb.0: 1191; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1192; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 1193; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1194; X86-NEXT: retl # encoding: [0xc3] 1195; 1196; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 1197; X64: # %bb.0: 1198; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1199; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 1200; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1201; X64-NEXT: retq # encoding: [0xc3] 1202 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1203 %2 = bitcast i16 %x3 to <16 x i1> 1204 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 1205 ret <16 x i16> %3 1206} 1207 1208define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1209; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 1210; X86: # %bb.0: 1211; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1212; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 1213; X86-NEXT: retl # encoding: [0xc3] 1214; 1215; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 1216; X64: # %bb.0: 1217; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1218; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2] 1219; X64-NEXT: retq # encoding: [0xc3] 1220 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1221 %2 = bitcast i16 %x3 to <16 x i1> 1222 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 1223 ret <16 x i16> %3 1224} 1225 1226declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>) 1227 1228define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1229; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128: 1230; CHECK: # %bb.0: 1231; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2] 1232; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1233 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1234 ret <8 x i16> %1 1235} 1236 1237define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1238; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 1239; X86: # %bb.0: 1240; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1241; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1242; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 1243; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1244; X86-NEXT: retl # encoding: [0xc3] 1245; 1246; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 1247; X64: # %bb.0: 1248; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1249; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 1250; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1251; X64-NEXT: retq # encoding: [0xc3] 1252 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1253 %2 = bitcast i8 %x3 to <8 x i1> 1254 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 1255 ret <8 x i16> %3 1256} 1257 1258declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>) 1259 1260define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1261; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256: 1262; CHECK: # %bb.0: 1263; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2] 1264; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1265 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1266 ret <16 x i16> %1 1267} 1268 1269define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1270; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 1271; X86: # %bb.0: 1272; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1273; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 1274; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1275; X86-NEXT: retl # encoding: [0xc3] 1276; 1277; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 1278; X64: # %bb.0: 1279; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1280; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 1281; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1282; X64-NEXT: retq # encoding: [0xc3] 1283 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1284 %2 = bitcast i16 %x3 to <16 x i1> 1285 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 1286 ret <16 x i16> %3 1287} 1288 1289declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) 1290 1291define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 1292; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128: 1293; X86: # %bb.0: 1294; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1295; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 1296; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1297; X86-NEXT: retl # encoding: [0xc3] 1298; 1299; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128: 1300; X64: # %bb.0: 1301; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1302; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 1303; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1304; X64-NEXT: retq # encoding: [0xc3] 1305 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1) 1306 %2 = bitcast i16 %x3 to <16 x i1> 1307 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2 1308 ret <16 x i8> %3 1309} 1310 1311declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) 1312 1313define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 1314; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256: 1315; X86: # %bb.0: 1316; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1317; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 1318; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1319; X86-NEXT: retl # encoding: [0xc3] 1320; 1321; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256: 1322; X64: # %bb.0: 1323; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1324; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 1325; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1326; X64-NEXT: retq # encoding: [0xc3] 1327 %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1) 1328 %2 = bitcast i32 %x3 to <32 x i1> 1329 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2 1330 ret <32 x i8> %3 1331} 1332 1333declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) 1334 1335define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1336; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128: 1337; X86: # %bb.0: 1338; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1339; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1340; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 1341; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1342; X86-NEXT: retl # encoding: [0xc3] 1343; 1344; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128: 1345; X64: # %bb.0: 1346; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1347; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 1348; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1349; X64-NEXT: retq # encoding: [0xc3] 1350 %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1) 1351 %2 = bitcast i8 %x3 to <8 x i1> 1352 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1353 ret <8 x i16> %3 1354} 1355 1356declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) 1357 1358define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1359; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256: 1360; X86: # %bb.0: 1361; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1362; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 1363; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1364; X86-NEXT: retl # encoding: [0xc3] 1365; 1366; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256: 1367; X64: # %bb.0: 1368; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1369; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 1370; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1371; X64-NEXT: retq # encoding: [0xc3] 1372 %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1) 1373 %2 = bitcast i16 %x3 to <16 x i1> 1374 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1375 ret <16 x i16> %3 1376} 1377 1378declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) 1379 1380define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1381; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 1382; X86: # %bb.0: 1383; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1384; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1385; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 1386; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1387; X86-NEXT: retl # encoding: [0xc3] 1388; 1389; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 1390; X64: # %bb.0: 1391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1392; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 1393; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1394; X64-NEXT: retq # encoding: [0xc3] 1395 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) 1396 %2 = bitcast i8 %x3 to <8 x i1> 1397 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1398 ret <8 x i16> %3 1399} 1400 1401declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) 1402 1403define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1404; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 1405; X86: # %bb.0: 1406; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1407; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 1408; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1409; X86-NEXT: retl # encoding: [0xc3] 1410; 1411; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 1412; X64: # %bb.0: 1413; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1414; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 1415; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1416; X64-NEXT: retq # encoding: [0xc3] 1417 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) 1418 %2 = bitcast i16 %x3 to <16 x i1> 1419 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1420 ret <16 x i16> %3 1421} 1422 1423declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) 1424 1425define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1426; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 1427; X86: # %bb.0: 1428; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1429; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1430; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 1431; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1432; X86-NEXT: retl # encoding: [0xc3] 1433; 1434; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 1435; X64: # %bb.0: 1436; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1437; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 1438; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1439; X64-NEXT: retq # encoding: [0xc3] 1440 %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1) 1441 %2 = bitcast i8 %x3 to <8 x i1> 1442 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1443 ret <8 x i16> %3 1444} 1445 1446declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) 1447 1448define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1449; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 1450; X86: # %bb.0: 1451; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1452; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 1453; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1454; X86-NEXT: retl # encoding: [0xc3] 1455; 1456; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 1457; X64: # %bb.0: 1458; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1459; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 1460; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1461; X64-NEXT: retq # encoding: [0xc3] 1462 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1) 1463 %2 = bitcast i16 %x3 to <16 x i1> 1464 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1465 ret <16 x i16> %3 1466} 1467 1468declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) 1469 1470define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 1471; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 1472; X86: # %bb.0: 1473; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1474; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1475; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 1476; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1477; X86-NEXT: retl # encoding: [0xc3] 1478; 1479; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 1480; X64: # %bb.0: 1481; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1482; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 1483; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1484; X64-NEXT: retq # encoding: [0xc3] 1485 %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1) 1486 %2 = bitcast i8 %x3 to <8 x i1> 1487 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1488 ret <8 x i16> %3 1489} 1490 1491declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) 1492 1493define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 1494; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 1495; X86: # %bb.0: 1496; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1497; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 1498; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1499; X86-NEXT: retl # encoding: [0xc3] 1500; 1501; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 1502; X64: # %bb.0: 1503; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1504; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 1505; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1506; X64-NEXT: retq # encoding: [0xc3] 1507 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1) 1508 %2 = bitcast i16 %x3 to <16 x i1> 1509 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1510 ret <16 x i16> %3 1511} 1512 1513declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) 1514 1515define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1516; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 1517; X86: # %bb.0: 1518; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1519; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1520; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] 1521; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 1522; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] 1523; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1524; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1525; X86-NEXT: retl # encoding: [0xc3] 1526; 1527; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 1528; X64: # %bb.0: 1529; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1530; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] 1531; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 1532; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] 1533; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1534; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1535; X64-NEXT: retq # encoding: [0xc3] 1536 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1537 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1538 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1539 %res3 = add <16 x i8> %res0, %res1 1540 %res4 = add <16 x i8> %res3, %res2 1541 ret <16 x i8> %res4 1542} 1543 1544declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) 1545 1546define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 1547; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 1548; X86: # %bb.0: 1549; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1550; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1551; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1552; X86-NEXT: vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00] 1553; X86-NEXT: vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00] 1554; X86-NEXT: retl # encoding: [0xc3] 1555; 1556; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 1557; X64: # %bb.0: 1558; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1559; X64-NEXT: vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07] 1560; X64-NEXT: vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07] 1561; X64-NEXT: retq # encoding: [0xc3] 1562 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 1563 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 1564 ret void 1565} 1566 1567declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) 1568 1569define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1570; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 1571; X86: # %bb.0: 1572; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1573; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1574; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] 1575; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 1576; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] 1577; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1578; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1579; X86-NEXT: retl # encoding: [0xc3] 1580; 1581; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 1582; X64: # %bb.0: 1583; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1584; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] 1585; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 1586; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] 1587; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1588; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1589; X64-NEXT: retq # encoding: [0xc3] 1590 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1591 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1592 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1593 %res3 = add <16 x i8> %res0, %res1 1594 %res4 = add <16 x i8> %res3, %res2 1595 ret <16 x i8> %res4 1596} 1597 1598declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) 1599 1600define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 1601; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 1602; X86: # %bb.0: 1603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1604; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1605; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1606; X86-NEXT: vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00] 1607; X86-NEXT: vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00] 1608; X86-NEXT: retl # encoding: [0xc3] 1609; 1610; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 1611; X64: # %bb.0: 1612; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1613; X64-NEXT: vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] 1614; X64-NEXT: vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] 1615; X64-NEXT: retq # encoding: [0xc3] 1616 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 1617 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 1618 ret void 1619} 1620 1621declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) 1622 1623define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 1624; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 1625; X86: # %bb.0: 1626; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1627; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1628; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] 1629; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 1630; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] 1631; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1632; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1633; X86-NEXT: retl # encoding: [0xc3] 1634; 1635; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 1636; X64: # %bb.0: 1637; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1638; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] 1639; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 1640; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] 1641; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 1642; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 1643; X64-NEXT: retq # encoding: [0xc3] 1644 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 1645 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 1646 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 1647 %res3 = add <16 x i8> %res0, %res1 1648 %res4 = add <16 x i8> %res3, %res2 1649 ret <16 x i8> %res4 1650} 1651 1652declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) 1653 1654define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 1655; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 1656; X86: # %bb.0: 1657; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1658; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1660; X86-NEXT: vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00] 1661; X86-NEXT: vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00] 1662; X86-NEXT: retl # encoding: [0xc3] 1663; 1664; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 1665; X64: # %bb.0: 1666; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1667; X64-NEXT: vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] 1668; X64-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] 1669; X64-NEXT: retq # encoding: [0xc3] 1670 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 1671 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 1672 ret void 1673} 1674 1675define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0) { 1676; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256: 1677; CHECK: # %bb.0: 1678; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 1679; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1680; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1681 %1 = trunc <16 x i16> %x0 to <16 x i8> 1682 ret <16 x i8> %1 1683} 1684 1685define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1686; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 1687; X86: # %bb.0: 1688; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1689; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 1690; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1691; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1692; X86-NEXT: retl # encoding: [0xc3] 1693; 1694; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 1695; X64: # %bb.0: 1696; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1697; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 1698; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1699; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1700; X64-NEXT: retq # encoding: [0xc3] 1701 %1 = trunc <16 x i16> %x0 to <16 x i8> 1702 %2 = bitcast i16 %x2 to <16 x i1> 1703 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1 1704 ret <16 x i8> %3 1705} 1706 1707define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) { 1708; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 1709; X86: # %bb.0: 1710; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1711; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 1712; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1713; X86-NEXT: retl # encoding: [0xc3] 1714; 1715; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256: 1716; X64: # %bb.0: 1717; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1718; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0] 1719; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1720; X64-NEXT: retq # encoding: [0xc3] 1721 %1 = trunc <16 x i16> %x0 to <16 x i8> 1722 %2 = bitcast i16 %x2 to <16 x i1> 1723 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 1724 ret <16 x i8> %3 1725} 1726 1727declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) 1728 1729define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 1730; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 1731; X86: # %bb.0: 1732; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1733; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1734; X86-NEXT: vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00] 1735; X86-NEXT: vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00] 1736; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1737; X86-NEXT: retl # encoding: [0xc3] 1738; 1739; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 1740; X64: # %bb.0: 1741; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1742; X64-NEXT: vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07] 1743; X64-NEXT: vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07] 1744; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1745; X64-NEXT: retq # encoding: [0xc3] 1746 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 1747 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 1748 ret void 1749} 1750 1751declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) 1752 1753define <16 x i8>@test_int_x86_avx512_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1) { 1754; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_256: 1755; CHECK: # %bb.0: 1756; CHECK-NEXT: vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] 1757; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1758; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1759 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 1760 ret <16 x i8> %res 1761} 1762 1763define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1764; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 1765; X86: # %bb.0: 1766; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1767; X86-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 1768; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1769; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1770; X86-NEXT: retl # encoding: [0xc3] 1771; 1772; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 1773; X64: # %bb.0: 1774; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1775; X64-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 1776; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1777; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1778; X64-NEXT: retq # encoding: [0xc3] 1779 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 1780 ret <16 x i8> %res 1781} 1782 1783define <16 x i8>@test_int_x86_avx512_maskz_pmovs_wb_256(<16 x i16> %x0, i16 %x2) { 1784; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256: 1785; X86: # %bb.0: 1786; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1787; X86-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0] 1788; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1789; X86-NEXT: retl # encoding: [0xc3] 1790; 1791; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256: 1792; X64: # %bb.0: 1793; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1794; X64-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0] 1795; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1796; X64-NEXT: retq # encoding: [0xc3] 1797 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 1798 ret <16 x i8> %res 1799} 1800 1801declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) 1802 1803define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 1804; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 1805; X86: # %bb.0: 1806; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1807; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1808; X86-NEXT: vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00] 1809; X86-NEXT: vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00] 1810; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1811; X86-NEXT: retl # encoding: [0xc3] 1812; 1813; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 1814; X64: # %bb.0: 1815; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1816; X64-NEXT: vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] 1817; X64-NEXT: vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] 1818; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1819; X64-NEXT: retq # encoding: [0xc3] 1820 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 1821 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 1822 ret void 1823} 1824 1825declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) 1826 1827define <16 x i8>@test_int_x86_avx512_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1828; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_256: 1829; CHECK: # %bb.0: 1830; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] 1831; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1832; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1833 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 1834 ret <16 x i8> %res 1835} 1836 1837define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 1838; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 1839; X86: # %bb.0: 1840; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1841; X86-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 1842; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1843; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1844; X86-NEXT: retl # encoding: [0xc3] 1845; 1846; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 1847; X64: # %bb.0: 1848; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1849; X64-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 1850; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1851; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1852; X64-NEXT: retq # encoding: [0xc3] 1853 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 1854 ret <16 x i8> %res 1855} 1856 1857define <16 x i8>@test_int_x86_avx512_maskz_pmovus_wb_256(<16 x i16> %x0, i16 %x2) { 1858; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256: 1859; X86: # %bb.0: 1860; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1861; X86-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0] 1862; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1863; X86-NEXT: retl # encoding: [0xc3] 1864; 1865; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256: 1866; X64: # %bb.0: 1867; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1868; X64-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0] 1869; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1870; X64-NEXT: retq # encoding: [0xc3] 1871 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 1872 ret <16 x i8> %res 1873} 1874 1875declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) 1876 1877define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 1878; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 1879; X86: # %bb.0: 1880; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1881; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1882; X86-NEXT: vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00] 1883; X86-NEXT: vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00] 1884; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1885; X86-NEXT: retl # encoding: [0xc3] 1886; 1887; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 1888; X64: # %bb.0: 1889; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1890; X64-NEXT: vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] 1891; X64-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] 1892; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1893; X64-NEXT: retq # encoding: [0xc3] 1894 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 1895 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 1896 ret void 1897} 1898 1899declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) 1900 1901define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 1902; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 1903; X86: # %bb.0: 1904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1905; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1906; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 1907; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1908; X86-NEXT: retl # encoding: [0xc3] 1909; 1910; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 1911; X64: # %bb.0: 1912; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1913; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 1914; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1915; X64-NEXT: retq # encoding: [0xc3] 1916 %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) 1917 %2 = bitcast i8 %x3 to <8 x i1> 1918 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1919 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2 1920 ret <4 x i32> %3 1921} 1922 1923declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) 1924 1925define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 1926; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 1927; X86: # %bb.0: 1928; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1929; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1930; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 1931; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1932; X86-NEXT: retl # encoding: [0xc3] 1933; 1934; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 1935; X64: # %bb.0: 1936; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1937; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 1938; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1939; X64-NEXT: retq # encoding: [0xc3] 1940 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) 1941 %2 = bitcast i8 %x3 to <8 x i1> 1942 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 1943 ret <8 x i32> %3 1944} 1945 1946declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) 1947 1948define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 1949; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 1950; X86: # %bb.0: 1951; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1952; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1953; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 1954; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1955; X86-NEXT: retl # encoding: [0xc3] 1956; 1957; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 1958; X64: # %bb.0: 1959; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1960; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 1961; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1962; X64-NEXT: retq # encoding: [0xc3] 1963 %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) 1964 %2 = bitcast i8 %x3 to <8 x i1> 1965 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 1966 ret <8 x i16> %3 1967} 1968 1969declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) 1970 1971define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 1972; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 1973; X86: # %bb.0: 1974; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1975; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 1976; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1977; X86-NEXT: retl # encoding: [0xc3] 1978; 1979; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 1980; X64: # %bb.0: 1981; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1982; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 1983; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1984; X64-NEXT: retq # encoding: [0xc3] 1985 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) 1986 %2 = bitcast i16 %x3 to <16 x i1> 1987 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 1988 ret <16 x i16> %3 1989} 1990 1991declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32) 1992 1993define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 1994; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 1995; X86: # %bb.0: 1996; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1997; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1998; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 1999; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 2000; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] 2001; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 2002; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2003; X86-NEXT: retl # encoding: [0xc3] 2004; 2005; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 2006; X64: # %bb.0: 2007; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2008; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 2009; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] 2010; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] 2011; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 2012; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 2013; X64-NEXT: retq # encoding: [0xc3] 2014 %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) 2015 %2 = bitcast i8 %x4 to <8 x i1> 2016 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 2017 %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3) 2018 %5 = bitcast i8 %x4 to <8 x i1> 2019 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 2020 %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4) 2021 %res3 = add <8 x i16> %3, %6 2022 %res4 = add <8 x i16> %7, %res3 2023 ret <8 x i16> %res4 2024} 2025 2026declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32) 2027 2028define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 2029; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 2030; X86: # %bb.0: 2031; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2032; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 2033; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 2034; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] 2035; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 2036; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2037; X86-NEXT: retl # encoding: [0xc3] 2038; 2039; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 2040; X64: # %bb.0: 2041; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2042; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 2043; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] 2044; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] 2045; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 2046; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 2047; X64-NEXT: retq # encoding: [0xc3] 2048 %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) 2049 %2 = bitcast i16 %x4 to <16 x i1> 2050 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 2051 %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3) 2052 %5 = bitcast i16 %x4 to <16 x i1> 2053 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 2054 %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4) 2055 %res3 = add <16 x i16> %3, %6 2056 %res4 = add <16 x i16> %res3, %7 2057 ret <16 x i16> %res4 2058} 2059 2060declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2061 2062define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 2063; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi: 2064; CHECK: # %bb.0: 2065; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] 2066; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2067 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 2068 ret <16 x i16> %res 2069} 2070 2071define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2072; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 2073; X86: # %bb.0: 2074; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2075; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 2076; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2077; X86-NEXT: retl # encoding: [0xc3] 2078; 2079; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 2080; X64: # %bb.0: 2081; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2082; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 2083; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2084; X64-NEXT: retq # encoding: [0xc3] 2085 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 2086 ret <16 x i16> %res 2087} 2088 2089define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2090; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 2091; X86: # %bb.0: 2092; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2093; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 2094; X86-NEXT: retl # encoding: [0xc3] 2095; 2096; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi: 2097; X64: # %bb.0: 2098; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2099; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 2100; X64-NEXT: retq # encoding: [0xc3] 2101 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 2102 ret <16 x i16> %res 2103} 2104 2105declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2106 2107define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 2108; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi: 2109; CHECK: # %bb.0: 2110; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] 2111; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2112 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 2113 ret <8 x i16> %res 2114} 2115 2116define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2117; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 2118; X86: # %bb.0: 2119; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2120; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2121; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 2122; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2123; X86-NEXT: retl # encoding: [0xc3] 2124; 2125; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 2126; X64: # %bb.0: 2127; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2128; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 2129; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2130; X64-NEXT: retq # encoding: [0xc3] 2131 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 2132 ret <8 x i16> %res 2133} 2134 2135define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2136; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 2137; X86: # %bb.0: 2138; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2139; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2140; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 2141; X86-NEXT: retl # encoding: [0xc3] 2142; 2143; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi: 2144; X64: # %bb.0: 2145; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2146; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 2147; X64-NEXT: retq # encoding: [0xc3] 2148 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 2149 ret <8 x i16> %res 2150} 2151 2152 2153define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize { 2154; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const: 2155; X86: # %bb.0: 2156; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] 2157; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 2158; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2159; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] 2160; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 2161; X86-NEXT: retl # encoding: [0xc3] 2162; 2163; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const: 2164; X64: # %bb.0: 2165; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] 2166; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 2167; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2168; X64-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] 2169; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2170; X64-NEXT: retq # encoding: [0xc3] 2171 %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2172 ret <8 x i16> %res 2173} 2174 2175declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) 2176 2177define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize { 2178; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const: 2179; X86: # %bb.0: 2180; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2181; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 2182; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2183; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] 2184; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 2185; X86-NEXT: retl # encoding: [0xc3] 2186; 2187; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const: 2188; X64: # %bb.0: 2189; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2190; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 2191; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2192; X64-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] 2193; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2194; X64-NEXT: retq # encoding: [0xc3] 2195 %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2196 ret <16 x i16> %res 2197} 2198 2199declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) 2200 2201declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) 2202 2203define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1) { 2204; CHECK-LABEL: test_int_x86_avx512_psrav16_hi: 2205; CHECK: # %bb.0: 2206; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] 2207; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2208 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2209 ret <16 x i16> %1 2210} 2211 2212define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2213; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi: 2214; X86: # %bb.0: 2215; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2216; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 2217; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2218; X86-NEXT: retl # encoding: [0xc3] 2219; 2220; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi: 2221; X64: # %bb.0: 2222; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2223; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 2224; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2225; X64-NEXT: retq # encoding: [0xc3] 2226 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2227 %2 = bitcast i16 %x3 to <16 x i1> 2228 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2229 ret <16 x i16> %3 2230} 2231 2232define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2233; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 2234; X86: # %bb.0: 2235; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2236; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 2237; X86-NEXT: retl # encoding: [0xc3] 2238; 2239; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi: 2240; X64: # %bb.0: 2241; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2242; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 2243; X64-NEXT: retq # encoding: [0xc3] 2244 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1) 2245 %2 = bitcast i16 %x3 to <16 x i1> 2246 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2247 ret <16 x i16> %3 2248} 2249 2250declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) 2251 2252define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1) { 2253; CHECK-LABEL: test_int_x86_avx512_psrav8_hi: 2254; CHECK: # %bb.0: 2255; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] 2256; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2257 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2258 ret <8 x i16> %1 2259} 2260 2261define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2262; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi: 2263; X86: # %bb.0: 2264; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2265; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2266; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 2267; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2268; X86-NEXT: retl # encoding: [0xc3] 2269; 2270; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi: 2271; X64: # %bb.0: 2272; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2273; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 2274; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2275; X64-NEXT: retq # encoding: [0xc3] 2276 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2277 %2 = bitcast i8 %x3 to <8 x i1> 2278 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2279 ret <8 x i16> %3 2280} 2281 2282define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2283; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 2284; X86: # %bb.0: 2285; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2286; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2287; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 2288; X86-NEXT: retl # encoding: [0xc3] 2289; 2290; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi: 2291; X64: # %bb.0: 2292; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2293; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 2294; X64-NEXT: retq # encoding: [0xc3] 2295 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1) 2296 %2 = bitcast i8 %x3 to <8 x i1> 2297 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2298 ret <8 x i16> %3 2299} 2300 2301define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1) { 2302; CHECK-LABEL: test_int_x86_avx512_psllv16_hi: 2303; CHECK: # %bb.0: 2304; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] 2305; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2306 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2307 ret <16 x i16> %1 2308} 2309 2310define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2311; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi: 2312; X86: # %bb.0: 2313; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2314; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 2315; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2316; X86-NEXT: retl # encoding: [0xc3] 2317; 2318; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi: 2319; X64: # %bb.0: 2320; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2321; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 2322; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2323; X64-NEXT: retq # encoding: [0xc3] 2324 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2325 %2 = bitcast i16 %x3 to <16 x i1> 2326 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2327 ret <16 x i16> %3 2328} 2329 2330define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2331; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 2332; X86: # %bb.0: 2333; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2334; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 2335; X86-NEXT: retl # encoding: [0xc3] 2336; 2337; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi: 2338; X64: # %bb.0: 2339; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2340; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 2341; X64-NEXT: retq # encoding: [0xc3] 2342 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1) 2343 %2 = bitcast i16 %x3 to <16 x i1> 2344 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2345 ret <16 x i16> %3 2346} 2347 2348define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1) { 2349; CHECK-LABEL: test_int_x86_avx512_psllv8_hi: 2350; CHECK: # %bb.0: 2351; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] 2352; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2353 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2354 ret <8 x i16> %1 2355} 2356 2357define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2358; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi: 2359; X86: # %bb.0: 2360; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2361; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2362; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 2363; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2364; X86-NEXT: retl # encoding: [0xc3] 2365; 2366; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi: 2367; X64: # %bb.0: 2368; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2369; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 2370; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2371; X64-NEXT: retq # encoding: [0xc3] 2372 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2373 %2 = bitcast i8 %x3 to <8 x i1> 2374 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2375 ret <8 x i16> %3 2376} 2377 2378define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2379; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 2380; X86: # %bb.0: 2381; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2382; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2383; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 2384; X86-NEXT: retl # encoding: [0xc3] 2385; 2386; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi: 2387; X64: # %bb.0: 2388; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2389; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 2390; X64-NEXT: retq # encoding: [0xc3] 2391 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1) 2392 %2 = bitcast i8 %x3 to <8 x i1> 2393 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2394 ret <8 x i16> %3 2395} 2396 2397define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize { 2398; X86-LABEL: test_int_x86_avx512_psllv_w_128_const: 2399; X86: # %bb.0: 2400; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] 2401; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 2402; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2403; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] 2404; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 2405; X86-NEXT: retl # encoding: [0xc3] 2406; 2407; X64-LABEL: test_int_x86_avx512_psllv_w_128_const: 2408; X64: # %bb.0: 2409; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] 2410; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 2411; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2412; X64-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] 2413; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2414; X64-NEXT: retq # encoding: [0xc3] 2415 %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2416 ret <8 x i16> %res 2417} 2418 2419declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) 2420 2421 2422define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize { 2423; X86-LABEL: test_int_x86_avx512_psllv_w_256_const: 2424; X86: # %bb.0: 2425; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2426; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 2427; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2428; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] 2429; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 2430; X86-NEXT: retl # encoding: [0xc3] 2431; 2432; X64-LABEL: test_int_x86_avx512_psllv_w_256_const: 2433; X64: # %bb.0: 2434; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 2435; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 2436; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2437; X64-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] 2438; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2439; X64-NEXT: retq # encoding: [0xc3] 2440 %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 2441 ret <16 x i16> %res 2442} 2443 2444declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) 2445 2446 2447 2448declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) 2449 2450define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1) { 2451; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128: 2452; CHECK: # %bb.0: 2453; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] 2454; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2455 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2456 ret <8 x i16> %1 2457} 2458 2459define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2460; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 2461; X86: # %bb.0: 2462; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2463; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2464; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 2465; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2466; X86-NEXT: retl # encoding: [0xc3] 2467; 2468; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 2469; X64: # %bb.0: 2470; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2471; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 2472; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2473; X64-NEXT: retq # encoding: [0xc3] 2474 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2475 %2 = bitcast i8 %x3 to <8 x i1> 2476 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2477 ret <8 x i16> %3 2478} 2479 2480define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) { 2481; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 2482; X86: # %bb.0: 2483; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2484; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2485; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 2486; X86-NEXT: retl # encoding: [0xc3] 2487; 2488; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128: 2489; X64: # %bb.0: 2490; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2491; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 2492; X64-NEXT: retq # encoding: [0xc3] 2493 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 2494 %2 = bitcast i8 %x3 to <8 x i1> 2495 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2496 ret <8 x i16> %3 2497} 2498 2499declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) 2500 2501define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1) { 2502; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256: 2503; CHECK: # %bb.0: 2504; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] 2505; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2506 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2507 ret <16 x i16> %1 2508} 2509 2510define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2511; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 2512; X86: # %bb.0: 2513; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2514; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 2515; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2516; X86-NEXT: retl # encoding: [0xc3] 2517; 2518; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 2519; X64: # %bb.0: 2520; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2521; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 2522; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2523; X64-NEXT: retq # encoding: [0xc3] 2524 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2525 %2 = bitcast i16 %x3 to <16 x i1> 2526 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2527 ret <16 x i16> %3 2528} 2529 2530define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) { 2531; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 2532; X86: # %bb.0: 2533; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2534; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 2535; X86-NEXT: retl # encoding: [0xc3] 2536; 2537; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256: 2538; X64: # %bb.0: 2539; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2540; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 2541; X64-NEXT: retq # encoding: [0xc3] 2542 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 2543 %2 = bitcast i16 %x3 to <16 x i1> 2544 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2545 ret <16 x i16> %3 2546} 2547