1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind { 6; CHECK-LABEL: test_int_x86_avx512_kadd_d: 7; CHECK: # %bb.0: # %entry 8; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0] 9; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9] 10; CHECK-NEXT: kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1] 11; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 12; CHECK-NEXT: kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0] 13; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 14; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 15; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16entry: 17 %0 = icmp ne <32 x i16> %A, zeroinitializer 18 %1 = icmp ne <32 x i16> %B, zeroinitializer 19 %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1) 20 %3 = bitcast <32 x i1> %2 to i32 21 %4 = icmp eq i32 %3, 0 22 %5 = zext i1 %4 to i32 23 ret i32 %5 24} 25declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>) 26 27define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind { 28; X86-LABEL: test_int_x86_avx512_kadd_q: 29; X86: # %bb.0: # %entry 30; X86-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] 31; X86-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] 32; X86-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1] 33; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 34; X86-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 35; X86-NEXT: kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1] 36; X86-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 37; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 38; X86-NEXT: retl # encoding: [0xc3] 39; 40; X64-LABEL: test_int_x86_avx512_kadd_q: 41; X64: # %bb.0: # %entry 42; X64-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] 43; X64-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] 44; X64-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1] 45; X64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 46; X64-NEXT: kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0] 47; X64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 48; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 49; X64-NEXT: retq # encoding: [0xc3] 50entry: 51 %0 = icmp ne <64 x i8> %A, zeroinitializer 52 %1 = icmp ne <64 x i8> %B, zeroinitializer 53 %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1) 54 %3 = bitcast <64 x i1> %2 to i64 55 %4 = icmp eq i64 %3, 0 56 %5 = zext i1 %4 to i32 57 ret i32 %5 58} 59declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>) 60 61define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) { 62; CHECK-LABEL: test_x86_avx512_ktestc_d: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0] 65; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9] 66; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 67; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1] 68; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 69; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 70; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 71 %1 = icmp ne <32 x i16> %A, zeroinitializer 72 %2 = icmp ne <32 x i16> %B, zeroinitializer 73 %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1] 74 ret i32 %res 75} 76declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone 77 78define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) { 79; CHECK-LABEL: test_x86_avx512_ktestz_d: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0] 82; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9] 83; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 84; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1] 85; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 86; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 87; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 88 %1 = icmp ne <32 x i16> %A, zeroinitializer 89 %2 = icmp ne <32 x i16> %B, zeroinitializer 90 %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1] 91 ret i32 %res 92} 93declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone 94 95define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) { 96; CHECK-LABEL: test_x86_avx512_ktestc_q: 97; CHECK: # %bb.0: 98; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] 99; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] 100; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 101; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1] 102; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] 103; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 104; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 105 %1 = icmp ne <64 x i8> %A, zeroinitializer 106 %2 = icmp ne <64 x i8> %B, zeroinitializer 107 %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1] 108 ret i32 %res 109} 110declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone 111 112define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) { 113; CHECK-LABEL: test_x86_avx512_ktestz_q: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0] 116; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9] 117; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 118; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1] 119; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] 120; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 121; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 122 %1 = icmp ne <64 x i8> %A, zeroinitializer 123 %2 = icmp ne <64 x i8> %B, zeroinitializer 124 %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1] 125 ret i32 %res 126} 127declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone 128 129define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 130; CHECK-LABEL: test_mask_packs_epi32_rr_512: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] 133; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 134 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 135 ret <32 x i16> %1 136} 137 138define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 139; X86-LABEL: test_mask_packs_epi32_rrk_512: 140; X86: # %bb.0: 141; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 142; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 143; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 144; X86-NEXT: retl # encoding: [0xc3] 145; 146; X64-LABEL: test_mask_packs_epi32_rrk_512: 147; X64: # %bb.0: 148; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 149; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 150; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 151; X64-NEXT: retq # encoding: [0xc3] 152 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 153 %2 = bitcast i32 %mask to <32 x i1> 154 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 155 ret <32 x i16> %3 156} 157 158define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 159; X86-LABEL: test_mask_packs_epi32_rrkz_512: 160; X86: # %bb.0: 161; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 162; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 163; X86-NEXT: retl # encoding: [0xc3] 164; 165; X64-LABEL: test_mask_packs_epi32_rrkz_512: 166; X64: # %bb.0: 167; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 168; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 169; X64-NEXT: retq # encoding: [0xc3] 170 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 171 %2 = bitcast i32 %mask to <32 x i1> 172 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 173 ret <32 x i16> %3 174} 175 176define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 177; X86-LABEL: test_mask_packs_epi32_rm_512: 178; X86: # %bb.0: 179; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 180; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] 181; X86-NEXT: retl # encoding: [0xc3] 182; 183; X64-LABEL: test_mask_packs_epi32_rm_512: 184; X64: # %bb.0: 185; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] 186; X64-NEXT: retq # encoding: [0xc3] 187 %b = load <16 x i32>, <16 x i32>* %ptr_b 188 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 189 ret <32 x i16> %1 190} 191 192define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 193; X86-LABEL: test_mask_packs_epi32_rmk_512: 194; X86: # %bb.0: 195; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 196; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 197; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] 198; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 199; X86-NEXT: retl # encoding: [0xc3] 200; 201; X64-LABEL: test_mask_packs_epi32_rmk_512: 202; X64: # %bb.0: 203; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 204; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] 205; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 206; X64-NEXT: retq # encoding: [0xc3] 207 %b = load <16 x i32>, <16 x i32>* %ptr_b 208 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 209 %2 = bitcast i32 %mask to <32 x i1> 210 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 211 ret <32 x i16> %3 212} 213 214define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 215; X86-LABEL: test_mask_packs_epi32_rmkz_512: 216; X86: # %bb.0: 217; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 218; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 219; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] 220; X86-NEXT: retl # encoding: [0xc3] 221; 222; X64-LABEL: test_mask_packs_epi32_rmkz_512: 223; X64: # %bb.0: 224; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 225; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] 226; X64-NEXT: retq # encoding: [0xc3] 227 %b = load <16 x i32>, <16 x i32>* %ptr_b 228 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 229 %2 = bitcast i32 %mask to <32 x i1> 230 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 231 ret <32 x i16> %3 232} 233 234define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 235; X86-LABEL: test_mask_packs_epi32_rmb_512: 236; X86: # %bb.0: 237; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 238; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] 239; X86-NEXT: retl # encoding: [0xc3] 240; 241; X64-LABEL: test_mask_packs_epi32_rmb_512: 242; X64: # %bb.0: 243; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] 244; X64-NEXT: retq # encoding: [0xc3] 245 %q = load i32, i32* %ptr_b 246 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 247 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 248 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 249 ret <32 x i16> %1 250} 251 252define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 253; X86-LABEL: test_mask_packs_epi32_rmbk_512: 254; X86: # %bb.0: 255; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 256; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 257; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] 258; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 259; X86-NEXT: retl # encoding: [0xc3] 260; 261; X64-LABEL: test_mask_packs_epi32_rmbk_512: 262; X64: # %bb.0: 263; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 264; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] 265; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 266; X64-NEXT: retq # encoding: [0xc3] 267 %q = load i32, i32* %ptr_b 268 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 269 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 270 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 271 %2 = bitcast i32 %mask to <32 x i1> 272 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 273 ret <32 x i16> %3 274} 275 276define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 277; X86-LABEL: test_mask_packs_epi32_rmbkz_512: 278; X86: # %bb.0: 279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 280; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 281; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] 282; X86-NEXT: retl # encoding: [0xc3] 283; 284; X64-LABEL: test_mask_packs_epi32_rmbkz_512: 285; X64: # %bb.0: 286; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 287; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] 288; X64-NEXT: retq # encoding: [0xc3] 289 %q = load i32, i32* %ptr_b 290 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 291 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 292 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) 293 %2 = bitcast i32 %mask to <32 x i1> 294 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 295 ret <32 x i16> %3 296} 297 298declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) 299 300define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 301; CHECK-LABEL: test_mask_packs_epi16_rr_512: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] 304; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 305 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 306 ret <64 x i8> %1 307} 308 309define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 310; X86-LABEL: test_mask_packs_epi16_rrk_512: 311; X86: # %bb.0: 312; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 313; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 314; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 315; X86-NEXT: retl # encoding: [0xc3] 316; 317; X64-LABEL: test_mask_packs_epi16_rrk_512: 318; X64: # %bb.0: 319; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 320; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 321; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 322; X64-NEXT: retq # encoding: [0xc3] 323 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 324 %2 = bitcast i64 %mask to <64 x i1> 325 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 326 ret <64 x i8> %3 327} 328 329define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 330; X86-LABEL: test_mask_packs_epi16_rrkz_512: 331; X86: # %bb.0: 332; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 333; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 334; X86-NEXT: retl # encoding: [0xc3] 335; 336; X64-LABEL: test_mask_packs_epi16_rrkz_512: 337; X64: # %bb.0: 338; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 339; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 340; X64-NEXT: retq # encoding: [0xc3] 341 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 342 %2 = bitcast i64 %mask to <64 x i1> 343 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 344 ret <64 x i8> %3 345} 346 347define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 348; X86-LABEL: test_mask_packs_epi16_rm_512: 349; X86: # %bb.0: 350; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 351; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] 352; X86-NEXT: retl # encoding: [0xc3] 353; 354; X64-LABEL: test_mask_packs_epi16_rm_512: 355; X64: # %bb.0: 356; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] 357; X64-NEXT: retq # encoding: [0xc3] 358 %b = load <32 x i16>, <32 x i16>* %ptr_b 359 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 360 ret <64 x i8> %1 361} 362 363define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 364; X86-LABEL: test_mask_packs_epi16_rmk_512: 365; X86: # %bb.0: 366; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 367; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 368; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] 369; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 370; X86-NEXT: retl # encoding: [0xc3] 371; 372; X64-LABEL: test_mask_packs_epi16_rmk_512: 373; X64: # %bb.0: 374; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 375; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] 376; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 377; X64-NEXT: retq # encoding: [0xc3] 378 %b = load <32 x i16>, <32 x i16>* %ptr_b 379 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 380 %2 = bitcast i64 %mask to <64 x i1> 381 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 382 ret <64 x i8> %3 383} 384 385define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 386; X86-LABEL: test_mask_packs_epi16_rmkz_512: 387; X86: # %bb.0: 388; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 389; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 390; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] 391; X86-NEXT: retl # encoding: [0xc3] 392; 393; X64-LABEL: test_mask_packs_epi16_rmkz_512: 394; X64: # %bb.0: 395; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 396; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] 397; X64-NEXT: retq # encoding: [0xc3] 398 %b = load <32 x i16>, <32 x i16>* %ptr_b 399 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) 400 %2 = bitcast i64 %mask to <64 x i1> 401 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 402 ret <64 x i8> %3 403} 404 405declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) 406 407 408define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 409; CHECK-LABEL: test_mask_packus_epi32_rr_512: 410; CHECK: # %bb.0: 411; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] 412; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 413 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 414 ret <32 x i16> %1 415} 416 417define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 418; X86-LABEL: test_mask_packus_epi32_rrk_512: 419; X86: # %bb.0: 420; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 421; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 422; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_mask_packus_epi32_rrk_512: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 428; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 429; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 430; X64-NEXT: retq # encoding: [0xc3] 431 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 432 %2 = bitcast i32 %mask to <32 x i1> 433 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 434 ret <32 x i16> %3 435} 436 437define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 438; X86-LABEL: test_mask_packus_epi32_rrkz_512: 439; X86: # %bb.0: 440; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 441; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 442; X86-NEXT: retl # encoding: [0xc3] 443; 444; X64-LABEL: test_mask_packus_epi32_rrkz_512: 445; X64: # %bb.0: 446; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 447; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 448; X64-NEXT: retq # encoding: [0xc3] 449 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 450 %2 = bitcast i32 %mask to <32 x i1> 451 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 452 ret <32 x i16> %3 453} 454 455define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 456; X86-LABEL: test_mask_packus_epi32_rm_512: 457; X86: # %bb.0: 458; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 459; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] 460; X86-NEXT: retl # encoding: [0xc3] 461; 462; X64-LABEL: test_mask_packus_epi32_rm_512: 463; X64: # %bb.0: 464; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] 465; X64-NEXT: retq # encoding: [0xc3] 466 %b = load <16 x i32>, <16 x i32>* %ptr_b 467 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 468 ret <32 x i16> %1 469} 470 471define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 472; X86-LABEL: test_mask_packus_epi32_rmk_512: 473; X86: # %bb.0: 474; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 475; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 476; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] 477; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 478; X86-NEXT: retl # encoding: [0xc3] 479; 480; X64-LABEL: test_mask_packus_epi32_rmk_512: 481; X64: # %bb.0: 482; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 483; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] 484; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 485; X64-NEXT: retq # encoding: [0xc3] 486 %b = load <16 x i32>, <16 x i32>* %ptr_b 487 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 488 %2 = bitcast i32 %mask to <32 x i1> 489 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 490 ret <32 x i16> %3 491} 492 493define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 494; X86-LABEL: test_mask_packus_epi32_rmkz_512: 495; X86: # %bb.0: 496; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 497; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 498; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] 499; X86-NEXT: retl # encoding: [0xc3] 500; 501; X64-LABEL: test_mask_packus_epi32_rmkz_512: 502; X64: # %bb.0: 503; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 504; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] 505; X64-NEXT: retq # encoding: [0xc3] 506 %b = load <16 x i32>, <16 x i32>* %ptr_b 507 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 508 %2 = bitcast i32 %mask to <32 x i1> 509 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 510 ret <32 x i16> %3 511} 512 513define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 514; X86-LABEL: test_mask_packus_epi32_rmb_512: 515; X86: # %bb.0: 516; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 517; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] 518; X86-NEXT: retl # encoding: [0xc3] 519; 520; X64-LABEL: test_mask_packus_epi32_rmb_512: 521; X64: # %bb.0: 522; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] 523; X64-NEXT: retq # encoding: [0xc3] 524 %q = load i32, i32* %ptr_b 525 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 526 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 527 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 528 ret <32 x i16> %1 529} 530 531define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 532; X86-LABEL: test_mask_packus_epi32_rmbk_512: 533; X86: # %bb.0: 534; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 535; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 536; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] 537; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 538; X86-NEXT: retl # encoding: [0xc3] 539; 540; X64-LABEL: test_mask_packus_epi32_rmbk_512: 541; X64: # %bb.0: 542; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 543; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] 544; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 545; X64-NEXT: retq # encoding: [0xc3] 546 %q = load i32, i32* %ptr_b 547 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 548 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 549 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 550 %2 = bitcast i32 %mask to <32 x i1> 551 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru 552 ret <32 x i16> %3 553} 554 555define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 556; X86-LABEL: test_mask_packus_epi32_rmbkz_512: 557; X86: # %bb.0: 558; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 559; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 560; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] 561; X86-NEXT: retl # encoding: [0xc3] 562; 563; X64-LABEL: test_mask_packus_epi32_rmbkz_512: 564; X64: # %bb.0: 565; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 566; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] 567; X64-NEXT: retq # encoding: [0xc3] 568 %q = load i32, i32* %ptr_b 569 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 570 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 571 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) 572 %2 = bitcast i32 %mask to <32 x i1> 573 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 574 ret <32 x i16> %3 575} 576 577declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) 578 579define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 580; CHECK-LABEL: test_mask_packus_epi16_rr_512: 581; CHECK: # %bb.0: 582; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] 583; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 584 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 585 ret <64 x i8> %1 586} 587 588define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 589; X86-LABEL: test_mask_packus_epi16_rrk_512: 590; X86: # %bb.0: 591; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 592; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 593; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 594; X86-NEXT: retl # encoding: [0xc3] 595; 596; X64-LABEL: test_mask_packus_epi16_rrk_512: 597; X64: # %bb.0: 598; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 599; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 600; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 601; X64-NEXT: retq # encoding: [0xc3] 602 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 603 %2 = bitcast i64 %mask to <64 x i1> 604 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 605 ret <64 x i8> %3 606} 607 608define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 609; X86-LABEL: test_mask_packus_epi16_rrkz_512: 610; X86: # %bb.0: 611; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 612; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 613; X86-NEXT: retl # encoding: [0xc3] 614; 615; X64-LABEL: test_mask_packus_epi16_rrkz_512: 616; X64: # %bb.0: 617; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 618; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 619; X64-NEXT: retq # encoding: [0xc3] 620 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 621 %2 = bitcast i64 %mask to <64 x i1> 622 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 623 ret <64 x i8> %3 624} 625 626define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 627; X86-LABEL: test_mask_packus_epi16_rm_512: 628; X86: # %bb.0: 629; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 630; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] 631; X86-NEXT: retl # encoding: [0xc3] 632; 633; X64-LABEL: test_mask_packus_epi16_rm_512: 634; X64: # %bb.0: 635; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] 636; X64-NEXT: retq # encoding: [0xc3] 637 %b = load <32 x i16>, <32 x i16>* %ptr_b 638 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 639 ret <64 x i8> %1 640} 641 642define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 643; X86-LABEL: test_mask_packus_epi16_rmk_512: 644; X86: # %bb.0: 645; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 646; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 647; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] 648; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 649; X86-NEXT: retl # encoding: [0xc3] 650; 651; X64-LABEL: test_mask_packus_epi16_rmk_512: 652; X64: # %bb.0: 653; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 654; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] 655; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 656; X64-NEXT: retq # encoding: [0xc3] 657 %b = load <32 x i16>, <32 x i16>* %ptr_b 658 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 659 %2 = bitcast i64 %mask to <64 x i1> 660 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru 661 ret <64 x i8> %3 662} 663 664define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 665; X86-LABEL: test_mask_packus_epi16_rmkz_512: 666; X86: # %bb.0: 667; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 668; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 669; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] 670; X86-NEXT: retl # encoding: [0xc3] 671; 672; X64-LABEL: test_mask_packus_epi16_rmkz_512: 673; X64: # %bb.0: 674; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 675; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] 676; X64-NEXT: retq # encoding: [0xc3] 677 %b = load <32 x i16>, <32 x i16>* %ptr_b 678 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) 679 %2 = bitcast i64 %mask to <64 x i1> 680 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer 681 ret <64 x i8> %3 682} 683 684declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) 685 686define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 687; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512: 688; CHECK: # %bb.0: 689; CHECK-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2] 690; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 691 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 692 ret <32 x i16> %1 693} 694 695define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 696; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 697; X86: # %bb.0: 698; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 699; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 700; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 701; X86-NEXT: retl # encoding: [0xc3] 702; 703; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 704; X64: # %bb.0: 705; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 706; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 707; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 708; X64-NEXT: retq # encoding: [0xc3] 709 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 710 %2 = bitcast i32 %x3 to <32 x i1> 711 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 712 ret <32 x i16> %3 713} 714 715define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 716; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 717; X86: # %bb.0: 718; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 719; X86-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2] 720; X86-NEXT: retl # encoding: [0xc3] 721; 722; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 723; X64: # %bb.0: 724; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 725; X64-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2] 726; X64-NEXT: retq # encoding: [0xc3] 727 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) 728 %2 = bitcast i32 %x3 to <32 x i1> 729 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 730 ret <32 x i16> %3 731} 732 733declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) 734 735define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 736; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512: 737; CHECK: # %bb.0: 738; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2] 739; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 740 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 741 ret <32 x i16> %1 742} 743 744define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 745; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 746; X86: # %bb.0: 747; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 748; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 749; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 750; X86-NEXT: retl # encoding: [0xc3] 751; 752; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 753; X64: # %bb.0: 754; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 755; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 756; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 757; X64-NEXT: retq # encoding: [0xc3] 758 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) 759 %2 = bitcast i32 %x3 to <32 x i1> 760 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 761 ret <32 x i16> %3 762} 763 764declare <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8>, <64 x i8>) 765 766define <64 x i8> @test_int_x86_avx512_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) { 767; CHECK-LABEL: test_int_x86_avx512_pavg_b_512: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1] 770; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 771 %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1) 772 ret <64 x i8> %1 773} 774 775define <64 x i8> @test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 776; X86-LABEL: test_int_x86_avx512_mask_pavg_b_512: 777; X86: # %bb.0: 778; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 779; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 780; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 781; X86-NEXT: retl # encoding: [0xc3] 782; 783; X64-LABEL: test_int_x86_avx512_mask_pavg_b_512: 784; X64: # %bb.0: 785; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 786; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 787; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 788; X64-NEXT: retq # encoding: [0xc3] 789 %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1) 790 %2 = bitcast i64 %x3 to <64 x i1> 791 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2 792 ret <64 x i8> %3 793} 794 795declare <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16>, <32 x i16>) 796 797define <32 x i16> @test_int_x86_avx512_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 798; CHECK-LABEL: test_int_x86_avx512_pavg_w_512: 799; CHECK: # %bb.0: 800; CHECK-NEXT: vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1] 801; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 802 %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1) 803 ret <32 x i16> %1 804} 805 806define <32 x i16> @test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 807; X86-LABEL: test_int_x86_avx512_mask_pavg_w_512: 808; X86: # %bb.0: 809; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 810; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 811; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 812; X86-NEXT: retl # encoding: [0xc3] 813; 814; X64-LABEL: test_int_x86_avx512_mask_pavg_w_512: 815; X64: # %bb.0: 816; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 817; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 818; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 819; X64-NEXT: retq # encoding: [0xc3] 820 %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1) 821 %2 = bitcast i32 %x3 to <32 x i1> 822 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 823 ret <32 x i16> %3 824} 825 826declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) 827 828define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { 829; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512: 830; CHECK: # %bb.0: 831; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1] 832; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 833 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 834 ret <64 x i8> %res 835} 836 837define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) { 838; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask: 839; X86: # %bb.0: 840; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 841; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 842; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 843; X86-NEXT: retl # encoding: [0xc3] 844; 845; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask: 846; X64: # %bb.0: 847; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 848; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 849; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 850; X64-NEXT: retq # encoding: [0xc3] 851 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 852 %mask.cast = bitcast i64 %mask to <64 x i1> 853 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 854 ret <64 x i8> %res2 855} 856 857define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) { 858; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: 859; X86: # %bb.0: 860; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 861; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] 862; X86-NEXT: retl # encoding: [0xc3] 863; 864; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: 865; X64: # %bb.0: 866; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 867; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1] 868; X64-NEXT: retq # encoding: [0xc3] 869 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) 870 %mask.cast = bitcast i64 %mask to <64 x i1> 871 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer 872 ret <64 x i8> %res2 873} 874 875declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>) 876 877define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 878; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512: 879; CHECK: # %bb.0: 880; CHECK-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1] 881; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 882 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) 883 ret <32 x i16> %1 884} 885 886define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 887; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 888; X86: # %bb.0: 889; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 890; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 891; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 892; X86-NEXT: retl # encoding: [0xc3] 893; 894; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 895; X64: # %bb.0: 896; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 897; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 898; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 899; X64-NEXT: retq # encoding: [0xc3] 900 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) 901 %2 = bitcast i32 %x3 to <32 x i1> 902 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 903 ret <32 x i16> %3 904} 905 906declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>) 907 908define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 909; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512: 910; CHECK: # %bb.0: 911; CHECK-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1] 912; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 913 %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) 914 ret <32 x i16> %1 915} 916 917define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 918; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 919; X86: # %bb.0: 920; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 921; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 922; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 923; X86-NEXT: retl # encoding: [0xc3] 924; 925; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 926; X64: # %bb.0: 927; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 928; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 929; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 930; X64-NEXT: retq # encoding: [0xc3] 931 %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) 932 %2 = bitcast i32 %x3 to <32 x i1> 933 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 934 ret <32 x i16> %3 935} 936 937declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>) 938 939define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) { 940; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512: 941; CHECK: # %bb.0: 942; CHECK-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1] 943; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 944 %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) 945 ret <32 x i16> %1 946} 947 948define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 949; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 950; X86: # %bb.0: 951; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 952; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 953; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 954; X86-NEXT: retl # encoding: [0xc3] 955; 956; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 957; X64: # %bb.0: 958; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 959; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 960; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 961; X64-NEXT: retq # encoding: [0xc3] 962 %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) 963 %2 = bitcast i32 %x3 to <32 x i1> 964 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 965 ret <32 x i16> %3 966} 967 968define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0) { 969; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512: 970; CHECK: # %bb.0: 971; CHECK-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0] 972; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 973 %1 = trunc <32 x i16> %x0 to <32 x i8> 974 ret <32 x i8> %1 975} 976 977define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 978; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 979; X86: # %bb.0: 980; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 981; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 982; X86-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 983; X86-NEXT: retl # encoding: [0xc3] 984; 985; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 986; X64: # %bb.0: 987; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 988; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1] 989; X64-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 990; X64-NEXT: retq # encoding: [0xc3] 991 %1 = trunc <32 x i16> %x0 to <32 x i8> 992 %2 = bitcast i32 %x2 to <32 x i1> 993 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1 994 ret <32 x i8> %3 995} 996 997define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) { 998; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512: 999; X86: # %bb.0: 1000; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1001; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0] 1002; X86-NEXT: retl # encoding: [0xc3] 1003; 1004; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512: 1005; X64: # %bb.0: 1006; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1007; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0] 1008; X64-NEXT: retq # encoding: [0xc3] 1009 %1 = trunc <32 x i16> %x0 to <32 x i8> 1010 %2 = bitcast i32 %x2 to <32 x i1> 1011 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1012 ret <32 x i8> %3 1013} 1014 1015declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1016 1017define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1018; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 1019; X86: # %bb.0: 1020; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1021; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1022; X86-NEXT: vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00] 1023; X86-NEXT: vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00] 1024; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1025; X86-NEXT: retl # encoding: [0xc3] 1026; 1027; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 1028; X64: # %bb.0: 1029; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1030; X64-NEXT: vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07] 1031; X64-NEXT: vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07] 1032; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1033; X64-NEXT: retq # encoding: [0xc3] 1034 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1035 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1036 ret void 1037} 1038 1039declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) 1040 1041define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) { 1042; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_512: 1043; CHECK: # %bb.0: 1044; CHECK-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0] 1045; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1046 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 1047 ret <32 x i8> %res 1048} 1049 1050define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 1051; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 1052; X86: # %bb.0: 1053; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1054; X86-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] 1055; X86-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 1056; X86-NEXT: retl # encoding: [0xc3] 1057; 1058; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 1059; X64: # %bb.0: 1060; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1061; X64-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1] 1062; X64-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 1063; X64-NEXT: retq # encoding: [0xc3] 1064 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 1065 ret <32 x i8> %res 1066} 1067 1068define <32 x i8>@test_int_x86_avx512_maskz_pmovs_wb_512(<32 x i16> %x0, i32 %x2) { 1069; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512: 1070; X86: # %bb.0: 1071; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1072; X86-NEXT: vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0] 1073; X86-NEXT: retl # encoding: [0xc3] 1074; 1075; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512: 1076; X64: # %bb.0: 1077; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1078; X64-NEXT: vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0] 1079; X64-NEXT: retq # encoding: [0xc3] 1080 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 1081 ret <32 x i8> %res 1082} 1083 1084declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1085 1086define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1087; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 1088; X86: # %bb.0: 1089; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1090; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1091; X86-NEXT: vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00] 1092; X86-NEXT: vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00] 1093; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1094; X86-NEXT: retl # encoding: [0xc3] 1095; 1096; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 1097; X64: # %bb.0: 1098; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1099; X64-NEXT: vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07] 1100; X64-NEXT: vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07] 1101; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1102; X64-NEXT: retq # encoding: [0xc3] 1103 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1104 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1105 ret void 1106} 1107 1108declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) 1109 1110define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1) { 1111; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_512: 1112; CHECK: # %bb.0: 1113; CHECK-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0] 1114; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1115 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 1116 ret <32 x i8> %res 1117} 1118 1119define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 1120; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 1121; X86: # %bb.0: 1122; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1123; X86-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] 1124; X86-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 1125; X86-NEXT: retl # encoding: [0xc3] 1126; 1127; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 1128; X64: # %bb.0: 1129; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1130; X64-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1] 1131; X64-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1] 1132; X64-NEXT: retq # encoding: [0xc3] 1133 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 1134 ret <32 x i8> %res 1135} 1136 1137define <32 x i8>@test_int_x86_avx512_maskz_pmovus_wb_512(<32 x i16> %x0, i32 %x2) { 1138; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512: 1139; X86: # %bb.0: 1140; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1141; X86-NEXT: vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0] 1142; X86-NEXT: retl # encoding: [0xc3] 1143; 1144; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512: 1145; X64: # %bb.0: 1146; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1147; X64-NEXT: vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0] 1148; X64-NEXT: retq # encoding: [0xc3] 1149 %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 1150 ret <32 x i8> %res 1151} 1152 1153declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) 1154 1155define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 1156; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 1157; X86: # %bb.0: 1158; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1160; X86-NEXT: vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00] 1161; X86-NEXT: vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00] 1162; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1163; X86-NEXT: retl # encoding: [0xc3] 1164; 1165; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 1166; X64: # %bb.0: 1167; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1168; X64-NEXT: vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07] 1169; X64-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07] 1170; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1171; X64-NEXT: retq # encoding: [0xc3] 1172 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 1173 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 1174 ret void 1175} 1176 1177declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) 1178 1179define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1) { 1180; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512: 1181; CHECK: # %bb.0: 1182; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1] 1183; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1184 %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) 1185 ret <32 x i16> %1 1186} 1187 1188define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 1189; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 1190; X86: # %bb.0: 1191; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1192; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 1193; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1194; X86-NEXT: retl # encoding: [0xc3] 1195; 1196; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 1197; X64: # %bb.0: 1198; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1199; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 1200; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1201; X64-NEXT: retq # encoding: [0xc3] 1202 %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) 1203 %2 = bitcast i32 %x3 to <32 x i1> 1204 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1205 ret <32 x i16> %3 1206} 1207 1208declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) 1209 1210define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1) { 1211; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512: 1212; CHECK: # %bb.0: 1213; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1] 1214; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1215 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) 1216 ret <16 x i32> %1 1217} 1218 1219define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 1220; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 1221; X86: # %bb.0: 1222; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1223; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 1224; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1225; X86-NEXT: retl # encoding: [0xc3] 1226; 1227; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 1228; X64: # %bb.0: 1229; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1230; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 1231; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1232; X64-NEXT: retq # encoding: [0xc3] 1233 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) 1234 %2 = bitcast i16 %x3 to <16 x i1> 1235 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 1236 ret <16 x i32> %3 1237} 1238 1239declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) 1240 1241define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 1242; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 1243; X86: # %bb.0: 1244; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1245; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 1246; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] 1247; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] 1248; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 1249; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1250; X86-NEXT: retl # encoding: [0xc3] 1251; 1252; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 1253; X64: # %bb.0: 1254; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1255; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 1256; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] 1257; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] 1258; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 1259; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1260; X64-NEXT: retq # encoding: [0xc3] 1261 %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) 1262 %2 = bitcast i32 %x4 to <32 x i1> 1263 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3 1264 %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3) 1265 %5 = bitcast i32 %x4 to <32 x i1> 1266 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer 1267 %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4) 1268 %res3 = add <32 x i16> %3, %6 1269 %res4 = add <32 x i16> %res3, %7 1270 ret <32 x i16> %res4 1271} 1272 1273declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) 1274 1275define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ 1276; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: 1277; CHECK: # %bb.0: 1278; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] 1279; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] 1280; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 1281; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1282 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) 1283 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) 1284 %res2 = add <8 x i64> %res, %res1 1285 ret <8 x i64> %res2 1286} 1287 1288declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone 1289 1290define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize { 1291; X86-LABEL: test_x86_avx512_psrlv_w_512_const: 1292; X86: # %bb.0: 1293; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 1294; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1295; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1296; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] 1297; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1298; X86-NEXT: retl # encoding: [0xc3] 1299; 1300; X64-LABEL: test_x86_avx512_psrlv_w_512_const: 1301; X64: # %bb.0: 1302; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 1303; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1304; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1305; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] 1306; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1307; X64-NEXT: retq # encoding: [0xc3] 1308 %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 1309 ret <32 x i16> %res1 1310} 1311 1312define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1) { 1313; CHECK-LABEL: test_int_x86_avx512_psrlv32hi: 1314; CHECK: # %bb.0: 1315; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1] 1316; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1317 %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1318 ret <32 x i16> %1 1319} 1320 1321define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1322; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi: 1323; X86: # %bb.0: 1324; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1325; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 1326; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1327; X86-NEXT: retl # encoding: [0xc3] 1328; 1329; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi: 1330; X64: # %bb.0: 1331; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1332; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1] 1333; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1334; X64-NEXT: retq # encoding: [0xc3] 1335 %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1336 %2 = bitcast i32 %x3 to <32 x i1> 1337 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1338 ret <32 x i16> %3 1339} 1340 1341define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 1342; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi: 1343; X86: # %bb.0: 1344; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1345; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 1346; X86-NEXT: retl # encoding: [0xc3] 1347; 1348; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi: 1349; X64: # %bb.0: 1350; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1351; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1] 1352; X64-NEXT: retq # encoding: [0xc3] 1353 %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1354 %2 = bitcast i32 %x3 to <32 x i1> 1355 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 1356 ret <32 x i16> %3 1357} 1358 1359declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) 1360 1361define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1) { 1362; CHECK-LABEL: test_int_x86_avx512_psrav32_hi: 1363; CHECK: # %bb.0: 1364; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1] 1365; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1366 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) 1367 ret <32 x i16> %1 1368} 1369 1370define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1371; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi: 1372; X86: # %bb.0: 1373; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1374; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 1375; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1376; X86-NEXT: retl # encoding: [0xc3] 1377; 1378; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi: 1379; X64: # %bb.0: 1380; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1381; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1] 1382; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1383; X64-NEXT: retq # encoding: [0xc3] 1384 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) 1385 %2 = bitcast i32 %x3 to <32 x i1> 1386 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1387 ret <32 x i16> %3 1388} 1389 1390define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 1391; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi: 1392; X86: # %bb.0: 1393; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1394; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 1395; X86-NEXT: retl # encoding: [0xc3] 1396; 1397; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi: 1398; X64: # %bb.0: 1399; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1400; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1] 1401; X64-NEXT: retq # encoding: [0xc3] 1402 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1) 1403 %2 = bitcast i32 %x3 to <32 x i1> 1404 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 1405 ret <32 x i16> %3 1406} 1407 1408define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1409; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 1410; X86: # %bb.0: 1411; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 1412; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1413; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1414; X86-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] 1415; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1416; X86-NEXT: retl # encoding: [0xc3] 1417; 1418; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 1419; X64: # %bb.0: 1420; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 1421; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1422; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1423; X64-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A] 1424; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1425; X64-NEXT: retq # encoding: [0xc3] 1426 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>) 1427 ret <32 x i16> %1 1428} 1429 1430define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1) { 1431; CHECK-LABEL: test_int_x86_avx512_psllv32hi: 1432; CHECK: # %bb.0: 1433; CHECK-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1] 1434; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1435 %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1436 ret <32 x i16> %1 1437} 1438 1439define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1440; X86-LABEL: test_int_x86_avx512_mask_psllv32hi: 1441; X86: # %bb.0: 1442; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1443; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 1444; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1445; X86-NEXT: retl # encoding: [0xc3] 1446; 1447; X64-LABEL: test_int_x86_avx512_mask_psllv32hi: 1448; X64: # %bb.0: 1449; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1450; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1] 1451; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1452; X64-NEXT: retq # encoding: [0xc3] 1453 %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1454 %2 = bitcast i32 %x3 to <32 x i1> 1455 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1456 ret <32 x i16> %3 1457} 1458 1459define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 1460; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi: 1461; X86: # %bb.0: 1462; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1463; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 1464; X86-NEXT: retl # encoding: [0xc3] 1465; 1466; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi: 1467; X64: # %bb.0: 1468; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1469; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1] 1470; X64-NEXT: retq # encoding: [0xc3] 1471 %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1) 1472 %2 = bitcast i32 %x3 to <32 x i1> 1473 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 1474 ret <32 x i16> %3 1475} 1476 1477declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) 1478 1479define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1) { 1480; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512: 1481; CHECK: # %bb.0: 1482; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0] 1483; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1484 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1485 ret <32 x i16> %1 1486} 1487 1488define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1489; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 1490; X86: # %bb.0: 1491; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1492; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 1493; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1494; X86-NEXT: retl # encoding: [0xc3] 1495; 1496; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 1497; X64: # %bb.0: 1498; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1499; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 1500; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1501; X64-NEXT: retq # encoding: [0xc3] 1502 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1503 %2 = bitcast i32 %x3 to <32 x i1> 1504 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 1505 ret <32 x i16> %3 1506} 1507 1508define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) { 1509; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512: 1510; X86: # %bb.0: 1511; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1512; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 1513; X86-NEXT: retl # encoding: [0xc3] 1514; 1515; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512: 1516; X64: # %bb.0: 1517; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1518; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 1519; X64-NEXT: retq # encoding: [0xc3] 1520 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) 1521 %2 = bitcast i32 %x3 to <32 x i1> 1522 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer 1523 ret <32 x i16> %3 1524} 1525 1526define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1527; CHECK-LABEL: test_x86_avx512_psll_w_512: 1528; CHECK: # %bb.0: 1529; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1] 1530; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1531 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1532 ret <32 x i16> %res 1533} 1534define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1535; X86-LABEL: test_x86_avx512_mask_psll_w_512: 1536; X86: # %bb.0: 1537; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1538; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1539; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1540; X86-NEXT: retl # encoding: [0xc3] 1541; 1542; X64-LABEL: test_x86_avx512_mask_psll_w_512: 1543; X64: # %bb.0: 1544; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1545; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1546; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1547; X64-NEXT: retq # encoding: [0xc3] 1548 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1549 %mask.cast = bitcast i32 %mask to <32 x i1> 1550 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1551 ret <32 x i16> %res2 1552} 1553define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1554; X86-LABEL: test_x86_avx512_maskz_psll_w_512: 1555; X86: # %bb.0: 1556; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1557; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1558; X86-NEXT: retl # encoding: [0xc3] 1559; 1560; X64-LABEL: test_x86_avx512_maskz_psll_w_512: 1561; X64: # %bb.0: 1562; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1563; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1564; X64-NEXT: retq # encoding: [0xc3] 1565 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1566 %mask.cast = bitcast i32 %mask to <32 x i1> 1567 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1568 ret <32 x i16> %res2 1569} 1570declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1571 1572 1573define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize { 1574; X86-LABEL: test_x86_avx512_psllv_w_512_const: 1575; X86: # %bb.0: 1576; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 1577; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1578; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1579; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] 1580; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 1581; X86-NEXT: retl # encoding: [0xc3] 1582; 1583; X64-LABEL: test_x86_avx512_psllv_w_512_const: 1584; X64: # %bb.0: 1585; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] 1586; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] 1587; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1588; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] 1589; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1590; X64-NEXT: retq # encoding: [0xc3] 1591 %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>) 1592 ret <32 x i16> %res1 1593} 1594declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone 1595 1596define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { 1597; CHECK-LABEL: test_x86_avx512_pslli_w_512: 1598; CHECK: # %bb.0: 1599; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07] 1600; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1601 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1602 ret <32 x i16> %res 1603} 1604define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1605; X86-LABEL: test_x86_avx512_mask_pslli_w_512: 1606; X86: # %bb.0: 1607; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1608; X86-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] 1609; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1610; X86-NEXT: retl # encoding: [0xc3] 1611; 1612; X64-LABEL: test_x86_avx512_mask_pslli_w_512: 1613; X64: # %bb.0: 1614; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1615; X64-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07] 1616; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1617; X64-NEXT: retq # encoding: [0xc3] 1618 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1619 %mask.cast = bitcast i32 %mask to <32 x i1> 1620 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1621 ret <32 x i16> %res2 1622} 1623define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) { 1624; X86-LABEL: test_x86_avx512_maskz_pslli_w_512: 1625; X86: # %bb.0: 1626; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1627; X86-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] 1628; X86-NEXT: retl # encoding: [0xc3] 1629; 1630; X64-LABEL: test_x86_avx512_maskz_pslli_w_512: 1631; X64: # %bb.0: 1632; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1633; X64-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07] 1634; X64-NEXT: retq # encoding: [0xc3] 1635 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1636 %mask.cast = bitcast i32 %mask to <32 x i1> 1637 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1638 ret <32 x i16> %res2 1639} 1640declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone 1641 1642 1643define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1644; CHECK-LABEL: test_x86_avx512_psra_w_512: 1645; CHECK: # %bb.0: 1646; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1] 1647; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1648 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1649 ret <32 x i16> %res 1650} 1651define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1652; X86-LABEL: test_x86_avx512_mask_psra_w_512: 1653; X86: # %bb.0: 1654; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1655; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1656; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1657; X86-NEXT: retl # encoding: [0xc3] 1658; 1659; X64-LABEL: test_x86_avx512_mask_psra_w_512: 1660; X64: # %bb.0: 1661; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1662; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 1663; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1664; X64-NEXT: retq # encoding: [0xc3] 1665 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1666 %mask.cast = bitcast i32 %mask to <32 x i1> 1667 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1668 ret <32 x i16> %res2 1669} 1670define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1671; X86-LABEL: test_x86_avx512_maskz_psra_w_512: 1672; X86: # %bb.0: 1673; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1674; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1675; X86-NEXT: retl # encoding: [0xc3] 1676; 1677; X64-LABEL: test_x86_avx512_maskz_psra_w_512: 1678; X64: # %bb.0: 1679; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1680; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 1681; X64-NEXT: retq # encoding: [0xc3] 1682 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1683 %mask.cast = bitcast i32 %mask to <32 x i1> 1684 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1685 ret <32 x i16> %res2 1686} 1687declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1688 1689 1690define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) { 1691; CHECK-LABEL: test_x86_avx512_psrai_w_512: 1692; CHECK: # %bb.0: 1693; CHECK-NEXT: vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07] 1694; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1695 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1696 ret <32 x i16> %res 1697} 1698define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1699; X86-LABEL: test_x86_avx512_mask_psrai_w_512: 1700; X86: # %bb.0: 1701; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1702; X86-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] 1703; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1704; X86-NEXT: retl # encoding: [0xc3] 1705; 1706; X64-LABEL: test_x86_avx512_mask_psrai_w_512: 1707; X64: # %bb.0: 1708; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1709; X64-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07] 1710; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1711; X64-NEXT: retq # encoding: [0xc3] 1712 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1713 %mask.cast = bitcast i32 %mask to <32 x i1> 1714 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1715 ret <32 x i16> %res2 1716} 1717define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1718; X86-LABEL: test_x86_avx512_maskz_psrai_w_512: 1719; X86: # %bb.0: 1720; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1721; X86-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] 1722; X86-NEXT: retl # encoding: [0xc3] 1723; 1724; X64-LABEL: test_x86_avx512_maskz_psrai_w_512: 1725; X64: # %bb.0: 1726; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1727; X64-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07] 1728; X64-NEXT: retq # encoding: [0xc3] 1729 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1730 %mask.cast = bitcast i32 %mask to <32 x i1> 1731 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1732 ret <32 x i16> %res2 1733} 1734declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone 1735 1736 1737define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) { 1738; CHECK-LABEL: test_x86_avx512_psrl_w_512: 1739; CHECK: # %bb.0: 1740; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] 1741; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1742 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1743 ret <32 x i16> %res 1744} 1745define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) { 1746; X86-LABEL: test_x86_avx512_mask_psrl_w_512: 1747; X86: # %bb.0: 1748; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1749; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1750; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1751; X86-NEXT: retl # encoding: [0xc3] 1752; 1753; X64-LABEL: test_x86_avx512_mask_psrl_w_512: 1754; X64: # %bb.0: 1755; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1756; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 1757; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1758; X64-NEXT: retq # encoding: [0xc3] 1759 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1760 %mask.cast = bitcast i32 %mask to <32 x i1> 1761 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1762 ret <32 x i16> %res2 1763} 1764define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) { 1765; X86-LABEL: test_x86_avx512_maskz_psrl_w_512: 1766; X86: # %bb.0: 1767; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1768; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1769; X86-NEXT: retl # encoding: [0xc3] 1770; 1771; X64-LABEL: test_x86_avx512_maskz_psrl_w_512: 1772; X64: # %bb.0: 1773; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1774; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 1775; X64-NEXT: retq # encoding: [0xc3] 1776 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1777 %mask.cast = bitcast i32 %mask to <32 x i1> 1778 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1779 ret <32 x i16> %res2 1780} 1781declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone 1782 1783define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, <8 x i16>* %p) { 1784; X86-LABEL: test_x86_avx512_psrl_w_512_load: 1785; X86: # %bb.0: 1786; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1787; X86-NEXT: vpsrlw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x00] 1788; X86-NEXT: retl # encoding: [0xc3] 1789; 1790; X64-LABEL: test_x86_avx512_psrl_w_512_load: 1791; X64: # %bb.0: 1792; X64-NEXT: vpsrlw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x07] 1793; X64-NEXT: retq # encoding: [0xc3] 1794 %a1 = load <8 x i16>, <8 x i16>* %p 1795 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] 1796 ret <32 x i16> %res 1797} 1798 1799define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) { 1800; CHECK-LABEL: test_x86_avx512_psrli_w_512: 1801; CHECK: # %bb.0: 1802; CHECK-NEXT: vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07] 1803; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1804 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1805 ret <32 x i16> %res 1806} 1807define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { 1808; X86-LABEL: test_x86_avx512_mask_psrli_w_512: 1809; X86: # %bb.0: 1810; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1811; X86-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] 1812; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1813; X86-NEXT: retl # encoding: [0xc3] 1814; 1815; X64-LABEL: test_x86_avx512_mask_psrli_w_512: 1816; X64: # %bb.0: 1817; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1818; X64-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07] 1819; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1820; X64-NEXT: retq # encoding: [0xc3] 1821 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1822 %mask.cast = bitcast i32 %mask to <32 x i1> 1823 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru 1824 ret <32 x i16> %res2 1825} 1826define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) { 1827; X86-LABEL: test_x86_avx512_maskz_psrli_w_512: 1828; X86: # %bb.0: 1829; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1830; X86-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] 1831; X86-NEXT: retl # encoding: [0xc3] 1832; 1833; X64-LABEL: test_x86_avx512_maskz_psrli_w_512: 1834; X64: # %bb.0: 1835; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1836; X64-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07] 1837; X64-NEXT: retq # encoding: [0xc3] 1838 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] 1839 %mask.cast = bitcast i32 %mask to <32 x i1> 1840 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer 1841 ret <32 x i16> %res2 1842} 1843declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone 1844