1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
6
7define i16 @unpckbw_test(i16 %a0, i16 %a1) {
8; X86-LABEL: unpckbw_test:
9; X86:       ## %bb.0:
10; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 ## encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12; X86-NEXT:    kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
13; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
14; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
15; X86-NEXT:    retl ## encoding: [0xc3]
16;
17; X64-LABEL: unpckbw_test:
18; X64:       ## %bb.0:
19; X64-NEXT:    kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
20; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
21; X64-NEXT:    kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
22; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
23; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
24; X64-NEXT:    retq ## encoding: [0xc3]
25  %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
26  ret i16 %res
27}
28
29define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
30; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
31; X86:       ## %bb.0:
32; X86-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x4c,0x24,0x01]
33; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
34; X86-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc1]
35; X86-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd1]
36; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
37; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
38; X86-NEXT:    retl ## encoding: [0xc3]
39;
40; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
41; X64:       ## %bb.0:
42; X64-NEXT:    vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf]
43; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
44; X64-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7]
45; X64-NEXT:    vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7]
46; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
47; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
48; X64-NEXT:    retq ## encoding: [0xc3]
49  %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
50  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
51  %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
52  %res3 = add <16 x i32> %res, %res1
53  %res4 = add <16 x i32> %res2, %res3
54  ret <16 x i32> %res4
55}
56declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
57
58
59define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
60; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
61; X86:       ## %bb.0:
62; X86-NEXT:    vpbroadcastq {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x8c,0x24,0x04,0x00,0x00,0x00]
63; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
64; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
65; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1]
66; X86-NEXT:    vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1]
67; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
68; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
69; X86-NEXT:    retl ## encoding: [0xc3]
70;
71; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
72; X64:       ## %bb.0:
73; X64-NEXT:    vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf]
74; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
75; X64-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7]
76; X64-NEXT:    vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7]
77; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
78; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
79; X64-NEXT:    retq ## encoding: [0xc3]
80  %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
81  %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
82  %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
83  %res3 = add <8 x i64> %res, %res1
84  %res4 = add <8 x i64> %res2, %res3
85  ret <8 x i64> %res4
86}
87declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
88
89
90declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
91
92define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1) {
93; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
94; CHECK:       ## %bb.0:
95; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xc0]
96; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
97
98  %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> undef, i16 -1)
99  ret <16 x float> %res
100}
101
102define <16 x float> @test_x86_mask_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
103; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_512:
104; X86:       ## %bb.0:
105; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
106; X86-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
107; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
108; X86-NEXT:    retl ## encoding: [0xc3]
109;
110; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_512:
111; X64:       ## %bb.0:
112; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
113; X64-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
114; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
115; X64-NEXT:    retq ## encoding: [0xc3]
116
117  %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
118  ret <16 x float> %res
119}
120
121define <16 x float> @test_x86_maskz_vbroadcast_ss_ps_512(<4 x float> %a0, i16 %mask ) {
122; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_512:
123; X86:       ## %bb.0:
124; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
125; X86-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
126; X86-NEXT:    retl ## encoding: [0xc3]
127;
128; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_512:
129; X64:       ## %bb.0:
130; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
131; X64-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
132; X64-NEXT:    retq ## encoding: [0xc3]
133
134  %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
135  ret <16 x float> %res
136}
137
138declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
139
140define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1) {
141; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
142; CHECK:       ## %bb.0:
143; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xc0]
144; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
145
146  %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> undef, i8 -1)
147  ret <8 x double> %res
148}
149
150define <8 x double> @test_x86_mask_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
151; X86-LABEL: test_x86_mask_vbroadcast_sd_pd_512:
152; X86:       ## %bb.0:
153; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
154; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
155; X86-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
156; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
157; X86-NEXT:    retl ## encoding: [0xc3]
158;
159; X64-LABEL: test_x86_mask_vbroadcast_sd_pd_512:
160; X64:       ## %bb.0:
161; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
162; X64-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
163; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
164; X64-NEXT:    retq ## encoding: [0xc3]
165
166  %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
167  ret <8 x double> %res
168}
169
170define <8 x double> @test_x86_maskz_vbroadcast_sd_pd_512(<2 x double> %a0, i8 %mask ) {
171; X86-LABEL: test_x86_maskz_vbroadcast_sd_pd_512:
172; X86:       ## %bb.0:
173; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
174; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
175; X86-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
176; X86-NEXT:    retl ## encoding: [0xc3]
177;
178; X64-LABEL: test_x86_maskz_vbroadcast_sd_pd_512:
179; X64:       ## %bb.0:
180; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
181; X64-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
182; X64-NEXT:    retq ## encoding: [0xc3]
183
184  %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
185  ret <8 x double> %res
186}
187
188declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
189
190define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1) {
191; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
192; CHECK:       ## %bb.0:
193; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xc0]
194; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
195  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
196  ret <16 x i32> %res
197}
198
199define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
200; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_512:
201; X86:       ## %bb.0:
202; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
203; X86-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
204; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
205; X86-NEXT:    retl ## encoding: [0xc3]
206;
207; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_512:
208; X64:       ## %bb.0:
209; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
210; X64-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
211; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
212; X64-NEXT:    retq ## encoding: [0xc3]
213  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
214  ret <16 x i32> %res
215}
216
217define <16 x i32>@test_int_x86_avx512_maskz_pbroadcastd_512(<4 x i32> %x0, i16 %mask) {
218; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_512:
219; X86:       ## %bb.0:
220; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
221; X86-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
222; X86-NEXT:    retl ## encoding: [0xc3]
223;
224; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_512:
225; X64:       ## %bb.0:
226; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
227; X64-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
228; X64-NEXT:    retq ## encoding: [0xc3]
229  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
230  ret <16 x i32> %res
231}
232
233declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
234
235define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1) {
236; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
237; CHECK:       ## %bb.0:
238; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xc0]
239; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
240  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
241  ret <8 x i64> %res
242}
243
244define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
245; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_512:
246; X86:       ## %bb.0:
247; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
248; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
249; X86-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
250; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
251; X86-NEXT:    retl ## encoding: [0xc3]
252;
253; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_512:
254; X64:       ## %bb.0:
255; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
256; X64-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
257; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
258; X64-NEXT:    retq ## encoding: [0xc3]
259  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
260  ret <8 x i64> %res
261}
262
263define <8 x i64>@test_int_x86_avx512_maskz_pbroadcastq_512(<2 x i64> %x0, i8 %mask) {
264; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_512:
265; X86:       ## %bb.0:
266; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
267; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
268; X86-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
269; X86-NEXT:    retl ## encoding: [0xc3]
270;
271; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_512:
272; X64:       ## %bb.0:
273; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
274; X64-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
275; X64-NEXT:    retq ## encoding: [0xc3]
276  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
277  ret <8 x i64> %res
278}
279
280declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
281
282define <16 x float>@test_int_x86_avx512_movsldup_512(<16 x float> %x0, <16 x float> %x1) {
283; CHECK-LABEL: test_int_x86_avx512_movsldup_512:
284; CHECK:       ## %bb.0:
285; CHECK-NEXT:    vmovsldup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xc0]
286; CHECK-NEXT:    ## zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
287; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
288  %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
289  ret <16 x float> %res
290}
291
292define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
293; X86-LABEL: test_int_x86_avx512_mask_movsldup_512:
294; X86:       ## %bb.0:
295; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
296; X86-NEXT:    vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
297; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
298; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
299; X86-NEXT:    retl ## encoding: [0xc3]
300;
301; X64-LABEL: test_int_x86_avx512_mask_movsldup_512:
302; X64:       ## %bb.0:
303; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
304; X64-NEXT:    vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
305; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
306; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
307; X64-NEXT:    retq ## encoding: [0xc3]
308  %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
309  ret <16 x float> %res
310}
311
312define <16 x float>@test_int_x86_avx512_maskz_movsldup_512(<16 x float> %x0, i16 %x2) {
313; X86-LABEL: test_int_x86_avx512_maskz_movsldup_512:
314; X86:       ## %bb.0:
315; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
316; X86-NEXT:    vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
317; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
318; X86-NEXT:    retl ## encoding: [0xc3]
319;
320; X64-LABEL: test_int_x86_avx512_maskz_movsldup_512:
321; X64:       ## %bb.0:
322; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
323; X64-NEXT:    vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
324; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
325; X64-NEXT:    retq ## encoding: [0xc3]
326  %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
327  ret <16 x float> %res
328}
329
330declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
331
332define <16 x float>@test_int_x86_avx512_movshdup_512(<16 x float> %x0, <16 x float> %x1) {
333; CHECK-LABEL: test_int_x86_avx512_movshdup_512:
334; CHECK:       ## %bb.0:
335; CHECK-NEXT:    vmovshdup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xc0]
336; CHECK-NEXT:    ## zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
337; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
338  %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
339  ret <16 x float> %res
340}
341
342define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
343; X86-LABEL: test_int_x86_avx512_mask_movshdup_512:
344; X86:       ## %bb.0:
345; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
346; X86-NEXT:    vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
347; X86-NEXT:    ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
348; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
349; X86-NEXT:    retl ## encoding: [0xc3]
350;
351; X64-LABEL: test_int_x86_avx512_mask_movshdup_512:
352; X64:       ## %bb.0:
353; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
354; X64-NEXT:    vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
355; X64-NEXT:    ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
356; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
357; X64-NEXT:    retq ## encoding: [0xc3]
358  %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
359  ret <16 x float> %res
360}
361
362define <16 x float>@test_int_x86_avx512_maskz_movshdup_512(<16 x float> %x0, i16 %x2) {
363; X86-LABEL: test_int_x86_avx512_maskz_movshdup_512:
364; X86:       ## %bb.0:
365; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
366; X86-NEXT:    vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
367; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
368; X86-NEXT:    retl ## encoding: [0xc3]
369;
370; X64-LABEL: test_int_x86_avx512_maskz_movshdup_512:
371; X64:       ## %bb.0:
372; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
373; X64-NEXT:    vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
374; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
375; X64-NEXT:    retq ## encoding: [0xc3]
376  %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
377  ret <16 x float> %res
378}
379
380declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
381
382define <8 x double>@test_int_x86_avx512_movddup_512(<8 x double> %x0, <8 x double> %x1) {
383; CHECK-LABEL: test_int_x86_avx512_movddup_512:
384; CHECK:       ## %bb.0:
385; CHECK-NEXT:    vmovddup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xc0]
386; CHECK-NEXT:    ## zmm0 = zmm0[0,0,2,2,4,4,6,6]
387; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
388  %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
389  ret <8 x double> %res
390}
391
392define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
393; X86-LABEL: test_int_x86_avx512_mask_movddup_512:
394; X86:       ## %bb.0:
395; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
396; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
397; X86-NEXT:    vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
398; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
399; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
400; X86-NEXT:    retl ## encoding: [0xc3]
401;
402; X64-LABEL: test_int_x86_avx512_mask_movddup_512:
403; X64:       ## %bb.0:
404; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
405; X64-NEXT:    vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
406; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
407; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
408; X64-NEXT:    retq ## encoding: [0xc3]
409  %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
410  ret <8 x double> %res
411}
412
413define <8 x double>@test_int_x86_avx512_maskz_movddup_512(<8 x double> %x0, i8 %x2) {
414; X86-LABEL: test_int_x86_avx512_maskz_movddup_512:
415; X86:       ## %bb.0:
416; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
417; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
418; X86-NEXT:    vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
419; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
420; X86-NEXT:    retl ## encoding: [0xc3]
421;
422; X64-LABEL: test_int_x86_avx512_maskz_movddup_512:
423; X64:       ## %bb.0:
424; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
425; X64-NEXT:    vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
426; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
427; X64-NEXT:    retq ## encoding: [0xc3]
428  %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
429  ret <8 x double> %res
430}
431
432declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
433
434define <8 x double>@test_int_x86_avx512_perm_df_512(<8 x double> %x0, <8 x double> %x2) {
435; CHECK-LABEL: test_int_x86_avx512_perm_df_512:
436; CHECK:       ## %bb.0:
437; CHECK-NEXT:    vpermpd $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xc0,0x03]
438; CHECK-NEXT:    ## zmm0 = zmm0[3,0,0,0,7,4,4,4]
439; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
440  %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
441  ret <8 x double> %res
442}
443
444define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
445; X86-LABEL: test_int_x86_avx512_mask_perm_df_512:
446; X86:       ## %bb.0:
447; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
448; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
449; X86-NEXT:    vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
450; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
451; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
452; X86-NEXT:    retl ## encoding: [0xc3]
453;
454; X64-LABEL: test_int_x86_avx512_mask_perm_df_512:
455; X64:       ## %bb.0:
456; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
457; X64-NEXT:    vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
458; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
459; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
460; X64-NEXT:    retq ## encoding: [0xc3]
461  %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
462  ret <8 x double> %res
463}
464
465define <8 x double>@test_int_x86_avx512_maskz_perm_df_512(<8 x double> %x0, i8 %x3) {
466; X86-LABEL: test_int_x86_avx512_maskz_perm_df_512:
467; X86:       ## %bb.0:
468; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
469; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
470; X86-NEXT:    vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
471; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
472; X86-NEXT:    retl ## encoding: [0xc3]
473;
474; X64-LABEL: test_int_x86_avx512_maskz_perm_df_512:
475; X64:       ## %bb.0:
476; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
477; X64-NEXT:    vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
478; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
479; X64-NEXT:    retq ## encoding: [0xc3]
480  %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
481  ret <8 x double> %res
482}
483
484declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
485
486define <8 x i64>@test_int_x86_avx512_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2) {
487; CHECK-LABEL: test_int_x86_avx512_perm_di_512:
488; CHECK:       ## %bb.0:
489; CHECK-NEXT:    vpermpd $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xc0,0x03]
490; CHECK-NEXT:    ## zmm0 = zmm0[3,0,0,0,7,4,4,4]
491; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
492  %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
493  ret <8 x i64> %res
494}
495
496define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
497; X86-LABEL: test_int_x86_avx512_mask_perm_di_512:
498; X86:       ## %bb.0:
499; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
500; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
501; X86-NEXT:    vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
502; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
503; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
504; X86-NEXT:    retl ## encoding: [0xc3]
505;
506; X64-LABEL: test_int_x86_avx512_mask_perm_di_512:
507; X64:       ## %bb.0:
508; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
509; X64-NEXT:    vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
510; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
511; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
512; X64-NEXT:    retq ## encoding: [0xc3]
513  %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
514  ret <8 x i64> %res
515}
516
517define <8 x i64>@test_int_x86_avx512_maskz_perm_di_512(<8 x i64> %x0, i32 %x1, i8 %x3) {
518; X86-LABEL: test_int_x86_avx512_maskz_perm_di_512:
519; X86:       ## %bb.0:
520; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
521; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
522; X86-NEXT:    vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
523; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
524; X86-NEXT:    retl ## encoding: [0xc3]
525;
526; X64-LABEL: test_int_x86_avx512_maskz_perm_di_512:
527; X64:       ## %bb.0:
528; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
529; X64-NEXT:    vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
530; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
531; X64-NEXT:    retq ## encoding: [0xc3]
532  %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
533  ret <8 x i64> %res
534}
535
536define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
537; X86-LABEL: test_store1:
538; X86:       ## %bb.0:
539; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
540; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
541; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
542; X86-NEXT:    vmovups %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x01]
543; X86-NEXT:    vmovups %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
544; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
545; X86-NEXT:    retl ## encoding: [0xc3]
546;
547; X64-LABEL: test_store1:
548; X64:       ## %bb.0:
549; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
550; X64-NEXT:    vmovups %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
551; X64-NEXT:    vmovups %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x06]
552; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
553; X64-NEXT:    retq ## encoding: [0xc3]
554  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
555  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
556  ret void
557}
558
559declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
560
561define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
562; X86-LABEL: test_store2:
563; X86:       ## %bb.0:
564; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
565; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
566; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
567; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
568; X86-NEXT:    vmovupd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x01]
569; X86-NEXT:    vmovupd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x00]
570; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
571; X86-NEXT:    retl ## encoding: [0xc3]
572;
573; X64-LABEL: test_store2:
574; X64:       ## %bb.0:
575; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
576; X64-NEXT:    vmovupd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
577; X64-NEXT:    vmovupd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x06]
578; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
579; X64-NEXT:    retq ## encoding: [0xc3]
580  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
581  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
582  ret void
583}
584
585declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
586
587define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
588; X86-LABEL: test_mask_store_aligned_ps:
589; X86:       ## %bb.0:
590; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
591; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
592; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
593; X86-NEXT:    vmovaps %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x01]
594; X86-NEXT:    vmovaps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x00]
595; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
596; X86-NEXT:    retl ## encoding: [0xc3]
597;
598; X64-LABEL: test_mask_store_aligned_ps:
599; X64:       ## %bb.0:
600; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
601; X64-NEXT:    vmovaps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07]
602; X64-NEXT:    vmovaps %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x06]
603; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
604; X64-NEXT:    retq ## encoding: [0xc3]
605  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
606  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
607  ret void
608}
609
610declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
611
612define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
613; X86-LABEL: test_mask_store_aligned_pd:
614; X86:       ## %bb.0:
615; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
616; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
617; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
618; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
619; X86-NEXT:    vmovapd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x01]
620; X86-NEXT:    vmovapd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x00]
621; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
622; X86-NEXT:    retl ## encoding: [0xc3]
623;
624; X64-LABEL: test_mask_store_aligned_pd:
625; X64:       ## %bb.0:
626; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
627; X64-NEXT:    vmovapd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x07]
628; X64-NEXT:    vmovapd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x06]
629; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
630; X64-NEXT:    retq ## encoding: [0xc3]
631  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
632  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
633  ret void
634}
635
636declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
637
638define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
639; X86-LABEL: test_int_x86_avx512_mask_storeu_q_512:
640; X86:       ## %bb.0:
641; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
642; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
643; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
644; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
645; X86-NEXT:    vmovdqu64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x01]
646; X86-NEXT:    vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
647; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
648; X86-NEXT:    retl ## encoding: [0xc3]
649;
650; X64-LABEL: test_int_x86_avx512_mask_storeu_q_512:
651; X64:       ## %bb.0:
652; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
653; X64-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x07]
654; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
655; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
656; X64-NEXT:    retq ## encoding: [0xc3]
657  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
658  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
659  ret void
660}
661
662declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
663
664define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
665; X86-LABEL: test_int_x86_avx512_mask_storeu_d_512:
666; X86:       ## %bb.0:
667; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
668; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
669; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
670; X86-NEXT:    vmovdqu32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x01]
671; X86-NEXT:    vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
672; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
673; X86-NEXT:    retl ## encoding: [0xc3]
674;
675; X64-LABEL: test_int_x86_avx512_mask_storeu_d_512:
676; X64:       ## %bb.0:
677; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
678; X64-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x07]
679; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
680; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
681; X64-NEXT:    retq ## encoding: [0xc3]
682  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
683  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
684  ret void
685}
686
687declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
688
689define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
690; X86-LABEL: test_int_x86_avx512_mask_store_q_512:
691; X86:       ## %bb.0:
692; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
693; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
694; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
695; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
696; X86-NEXT:    vmovdqa64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x01]
697; X86-NEXT:    vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
698; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
699; X86-NEXT:    retl ## encoding: [0xc3]
700;
701; X64-LABEL: test_int_x86_avx512_mask_store_q_512:
702; X64:       ## %bb.0:
703; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
704; X64-NEXT:    vmovdqa64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x07]
705; X64-NEXT:    vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
706; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
707; X64-NEXT:    retq ## encoding: [0xc3]
708  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
709  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
710  ret void
711}
712
713declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
714
715define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
716; X86-LABEL: test_int_x86_avx512_mask_store_d_512:
717; X86:       ## %bb.0:
718; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
719; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
720; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
721; X86-NEXT:    vmovdqa32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x01]
722; X86-NEXT:    vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
723; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
724; X86-NEXT:    retl ## encoding: [0xc3]
725;
726; X64-LABEL: test_int_x86_avx512_mask_store_d_512:
727; X64:       ## %bb.0:
728; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
729; X64-NEXT:    vmovdqa32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x07]
730; X64-NEXT:    vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
731; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
732; X64-NEXT:    retq ## encoding: [0xc3]
733  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
734  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
735  ret void
736}
737
738declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)
739
740define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
741; X86-LABEL: test_mask_load_aligned_ps:
742; X86:       ## %bb.0:
743; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
744; X86-NEXT:    vmovaps (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x00]
745; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
746; X86-NEXT:    vmovaps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x00]
747; X86-NEXT:    vmovaps (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x08]
748; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
749; X86-NEXT:    retl ## encoding: [0xc3]
750;
751; X64-LABEL: test_mask_load_aligned_ps:
752; X64:       ## %bb.0:
753; X64-NEXT:    vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
754; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
755; X64-NEXT:    vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
756; X64-NEXT:    vmovaps (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x0f]
757; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
758; X64-NEXT:    retq ## encoding: [0xc3]
759  %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
760  %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
761  %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
762  %res4 = fadd <16 x float> %res2, %res1
763  ret <16 x float> %res4
764}
765
766declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
767
768define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
769; X86-LABEL: test_mask_load_unaligned_ps:
770; X86:       ## %bb.0:
771; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
772; X86-NEXT:    vmovups (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
773; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
774; X86-NEXT:    vmovups (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x00]
775; X86-NEXT:    vmovups (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x08]
776; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
777; X86-NEXT:    retl ## encoding: [0xc3]
778;
779; X64-LABEL: test_mask_load_unaligned_ps:
780; X64:       ## %bb.0:
781; X64-NEXT:    vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
782; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
783; X64-NEXT:    vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
784; X64-NEXT:    vmovups (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x0f]
785; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
786; X64-NEXT:    retq ## encoding: [0xc3]
787  %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
788  %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
789  %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
790  %res4 = fadd <16 x float> %res2, %res1
791  ret <16 x float> %res4
792}
793
794declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
795
796define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
797; X86-LABEL: test_mask_load_aligned_pd:
798; X86:       ## %bb.0:
799; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
800; X86-NEXT:    vmovapd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x00]
801; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
802; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
803; X86-NEXT:    vmovapd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x00]
804; X86-NEXT:    vmovapd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x08]
805; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
806; X86-NEXT:    retl ## encoding: [0xc3]
807;
808; X64-LABEL: test_mask_load_aligned_pd:
809; X64:       ## %bb.0:
810; X64-NEXT:    vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
811; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
812; X64-NEXT:    vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
813; X64-NEXT:    vmovapd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x0f]
814; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
815; X64-NEXT:    retq ## encoding: [0xc3]
816  %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
817  %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
818  %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
819  %res4 = fadd <8 x double> %res2, %res1
820  ret <8 x double> %res4
821}
822
823declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
824
825define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
826; X86-LABEL: test_mask_load_unaligned_pd:
827; X86:       ## %bb.0:
828; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
829; X86-NEXT:    vmovupd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x00]
830; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
831; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
832; X86-NEXT:    vmovupd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x00]
833; X86-NEXT:    vmovupd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x08]
834; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
835; X86-NEXT:    retl ## encoding: [0xc3]
836;
837; X64-LABEL: test_mask_load_unaligned_pd:
838; X64:       ## %bb.0:
839; X64-NEXT:    vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
840; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
841; X64-NEXT:    vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
842; X64-NEXT:    vmovupd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x0f]
843; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
844; X64-NEXT:    retq ## encoding: [0xc3]
845  %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
846  %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
847  %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
848  %res4 = fadd <8 x double> %res2, %res1
849  ret <8 x double> %res4
850}
851
852declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
853
854declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16)
855
856define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) {
857; X86-LABEL: test_mask_load_unaligned_d:
858; X86:       ## %bb.0:
859; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
860; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
861; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
862; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
863; X86-NEXT:    vmovdqu32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x00]
864; X86-NEXT:    vmovdqu32 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x09]
865; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
866; X86-NEXT:    retl ## encoding: [0xc3]
867;
868; X64-LABEL: test_mask_load_unaligned_d:
869; X64:       ## %bb.0:
870; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
871; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
872; X64-NEXT:    vmovdqu32 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x06]
873; X64-NEXT:    vmovdqu32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x0f]
874; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
875; X64-NEXT:    retq ## encoding: [0xc3]
876  %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
877  %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask)
878  %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
879  %res4 = add <16 x i32> %res2, %res1
880  ret <16 x i32> %res4
881}
882
883declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8)
884
885define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) {
886; X86-LABEL: test_mask_load_unaligned_q:
887; X86:       ## %bb.0:
888; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
889; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
890; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
891; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
892; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
893; X86-NEXT:    vmovdqu64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x00]
894; X86-NEXT:    vmovdqu64 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x09]
895; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
896; X86-NEXT:    retl ## encoding: [0xc3]
897;
898; X64-LABEL: test_mask_load_unaligned_q:
899; X64:       ## %bb.0:
900; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
901; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
902; X64-NEXT:    vmovdqu64 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x06]
903; X64-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x0f]
904; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
905; X64-NEXT:    retq ## encoding: [0xc3]
906  %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
907  %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask)
908  %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
909  %res4 = add <8 x i64> %res2, %res1
910  ret <8 x i64> %res4
911}
912
913declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16)
914
915define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) {
916; X86-LABEL: test_mask_load_aligned_d:
917; X86:       ## %bb.0:
918; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
919; X86-NEXT:    vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
920; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
921; X86-NEXT:    vmovdqa32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x00]
922; X86-NEXT:    vmovdqa32 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x08]
923; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
924; X86-NEXT:    retl ## encoding: [0xc3]
925;
926; X64-LABEL: test_mask_load_aligned_d:
927; X64:       ## %bb.0:
928; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
929; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
930; X64-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
931; X64-NEXT:    vmovdqa32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x0f]
932; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
933; X64-NEXT:    retq ## encoding: [0xc3]
934  %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
935  %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask)
936  %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
937  %res4 = add <16 x i32> %res2, %res1
938  ret <16 x i32> %res4
939}
940
941declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8)
942
943define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) {
944; X86-LABEL: test_mask_load_aligned_q:
945; X86:       ## %bb.0:
946; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
947; X86-NEXT:    vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
948; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
949; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
950; X86-NEXT:    vmovdqa64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x00]
951; X86-NEXT:    vmovdqa64 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x08]
952; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
953; X86-NEXT:    retl ## encoding: [0xc3]
954;
955; X64-LABEL: test_mask_load_aligned_q:
956; X64:       ## %bb.0:
957; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
958; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
959; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
960; X64-NEXT:    vmovdqa64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x0f]
961; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
962; X64-NEXT:    retq ## encoding: [0xc3]
963  %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
964  %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask)
965  %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
966  %res4 = add <8 x i64> %res2, %res1
967  ret <8 x i64> %res4
968}
969
970declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
971
972define <8 x double>@test_int_x86_avx512_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2) {
973; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_512:
974; CHECK:       ## %bb.0:
975; CHECK-NEXT:    vpermilpd $22, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xc0,0x16]
976; CHECK-NEXT:    ## zmm0 = zmm0[0,1,3,2,5,4,6,6]
977; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
978  %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
979  ret <8 x double> %res
980}
981
982define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
983; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
984; X86:       ## %bb.0:
985; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
986; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
987; X86-NEXT:    vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
988; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
989; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
990; X86-NEXT:    retl ## encoding: [0xc3]
991;
992; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
993; X64:       ## %bb.0:
994; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
995; X64-NEXT:    vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
996; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
997; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
998; X64-NEXT:    retq ## encoding: [0xc3]
999  %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
1000  ret <8 x double> %res
1001}
1002
1003define <8 x double>@test_int_x86_avx512_maskz_vpermil_pd_512(<8 x double> %x0, i8 %x3) {
1004; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_512:
1005; X86:       ## %bb.0:
1006; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1007; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1008; X86-NEXT:    vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
1009; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
1010; X86-NEXT:    retl ## encoding: [0xc3]
1011;
1012; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_512:
1013; X64:       ## %bb.0:
1014; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1015; X64-NEXT:    vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
1016; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
1017; X64-NEXT:    retq ## encoding: [0xc3]
1018  %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
1019  ret <8 x double> %res
1020}
1021
1022declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
1023
1024define <16 x float>@test_int_x86_avx512_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2) {
1025; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_512:
1026; CHECK:       ## %bb.0:
1027; CHECK-NEXT:    vpermilps $22, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x16]
1028; CHECK-NEXT:    ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1029; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1030  %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
1031  ret <16 x float> %res
1032}
1033
1034define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
1035; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
1036; X86:       ## %bb.0:
1037; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1038; X86-NEXT:    vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
1039; X86-NEXT:    ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1040; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1041; X86-NEXT:    retl ## encoding: [0xc3]
1042;
1043; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
1044; X64:       ## %bb.0:
1045; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1046; X64-NEXT:    vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
1047; X64-NEXT:    ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1048; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1049; X64-NEXT:    retq ## encoding: [0xc3]
1050  %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
1051  ret <16 x float> %res
1052}
1053
1054define <16 x float>@test_int_x86_avx512_maskz_vpermil_ps_512(<16 x float> %x0, i16 %x3) {
1055; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_512:
1056; X86:       ## %bb.0:
1057; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1058; X86-NEXT:    vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
1059; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1060; X86-NEXT:    retl ## encoding: [0xc3]
1061;
1062; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_512:
1063; X64:       ## %bb.0:
1064; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1065; X64-NEXT:    vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
1066; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1067; X64-NEXT:    retq ## encoding: [0xc3]
1068  %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
1069  ret <16 x float> %res
1070}
1071
1072declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16)
1073
1074define <16 x i32>@test_int_x86_avx512_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2) {
1075; CHECK-LABEL: test_int_x86_avx512_pshuf_d_512:
1076; CHECK:       ## %bb.0:
1077; CHECK-NEXT:    vpermilps $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x03]
1078; CHECK-NEXT:    ## zmm0 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1079; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1080  %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
1081  ret <16 x i32> %res
1082}
1083
1084define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
1085; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
1086; X86:       ## %bb.0:
1087; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1088; X86-NEXT:    vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
1089; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1090; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1091; X86-NEXT:    retl ## encoding: [0xc3]
1092;
1093; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
1094; X64:       ## %bb.0:
1095; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1096; X64-NEXT:    vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
1097; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1098; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1099; X64-NEXT:    retq ## encoding: [0xc3]
1100  %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
1101  ret <16 x i32> %res
1102}
1103
1104define <16 x i32>@test_int_x86_avx512_maskz_pshuf_d_512(<16 x i32> %x0, i32 %x1, i16 %x3) {
1105; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_512:
1106; X86:       ## %bb.0:
1107; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1108; X86-NEXT:    vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
1109; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1110; X86-NEXT:    retl ## encoding: [0xc3]
1111;
1112; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_512:
1113; X64:       ## %bb.0:
1114; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1115; X64-NEXT:    vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
1116; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1117; X64-NEXT:    retq ## encoding: [0xc3]
1118  %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
1119  ret <16 x i32> %res
1120}
1121
1122define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
1123; CHECK-LABEL: test_pcmpeq_d:
1124; CHECK:       ## %bb.0:
1125; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1126; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1127; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
1128; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1129; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1130  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1131  ret i16 %res
1132}
1133
1134define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1135; X86-LABEL: test_mask_pcmpeq_d:
1136; X86:       ## %bb.0:
1137; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1138; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1139; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
1140; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
1141; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1142; X86-NEXT:    retl ## encoding: [0xc3]
1143;
1144; X64-LABEL: test_mask_pcmpeq_d:
1145; X64:       ## %bb.0:
1146; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1147; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1148; X64-NEXT:    andl %edi, %eax ## encoding: [0x21,0xf8]
1149; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
1150; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1151; X64-NEXT:    retq ## encoding: [0xc3]
1152  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1153  ret i16 %res
1154}
1155
1156declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
1157
1158define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
1159; CHECK-LABEL: test_pcmpeq_q:
1160; CHECK:       ## %bb.0:
1161; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1162; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1163; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
1164; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1165; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1166  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1167  ret i8 %res
1168}
1169
1170define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1171; X86-LABEL: test_mask_pcmpeq_q:
1172; X86:       ## %bb.0:
1173; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1174; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1175; X86-NEXT:    andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
1176; X86-NEXT:    ## kill: def $al killed $al killed $eax
1177; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1178; X86-NEXT:    retl ## encoding: [0xc3]
1179;
1180; X64-LABEL: test_mask_pcmpeq_q:
1181; X64:       ## %bb.0:
1182; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1183; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1184; X64-NEXT:    andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1185; X64-NEXT:    ## kill: def $al killed $al killed $eax
1186; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1187; X64-NEXT:    retq ## encoding: [0xc3]
1188  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1189  ret i8 %res
1190}
1191
1192declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1193
1194define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1195; CHECK-LABEL: test_pcmpgt_d:
1196; CHECK:       ## %bb.0:
1197; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1198; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1199; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
1200; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1201; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1202  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1203  ret i16 %res
1204}
1205
1206define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1207; X86-LABEL: test_mask_pcmpgt_d:
1208; X86:       ## %bb.0:
1209; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1210; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1211; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
1212; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
1213; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1214; X86-NEXT:    retl ## encoding: [0xc3]
1215;
1216; X64-LABEL: test_mask_pcmpgt_d:
1217; X64:       ## %bb.0:
1218; X64-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1219; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1220; X64-NEXT:    andl %edi, %eax ## encoding: [0x21,0xf8]
1221; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
1222; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1223; X64-NEXT:    retq ## encoding: [0xc3]
1224  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1225  ret i16 %res
1226}
1227
1228declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1229
1230define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1231; CHECK-LABEL: test_pcmpgt_q:
1232; CHECK:       ## %bb.0:
1233; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1234; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1235; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
1236; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1237; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1238  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1239  ret i8 %res
1240}
1241
1242define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1243; X86-LABEL: test_mask_pcmpgt_q:
1244; X86:       ## %bb.0:
1245; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1246; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1247; X86-NEXT:    andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
1248; X86-NEXT:    ## kill: def $al killed $al killed $eax
1249; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1250; X86-NEXT:    retl ## encoding: [0xc3]
1251;
1252; X64-LABEL: test_mask_pcmpgt_q:
1253; X64:       ## %bb.0:
1254; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1255; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1256; X64-NEXT:    andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1257; X64-NEXT:    ## kill: def $al killed $al killed $eax
1258; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1259; X64-NEXT:    retq ## encoding: [0xc3]
1260  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1261  ret i8 %res
1262}
1263
1264declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1265
1266declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1267
1268define <8 x double>@test_int_x86_avx512_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) {
1269; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_512:
1270; CHECK:       ## %bb.0:
1271; CHECK-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xc1]
1272; CHECK-NEXT:    ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1273; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1274  %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1275  ret <8 x double> %res
1276}
1277
1278define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1279; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1280; X86:       ## %bb.0:
1281; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1282; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1283; X86-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1284; X86-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1285; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1286; X86-NEXT:    retl ## encoding: [0xc3]
1287;
1288; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1289; X64:       ## %bb.0:
1290; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1291; X64-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1292; X64-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1293; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1294; X64-NEXT:    retq ## encoding: [0xc3]
1295  %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1296  ret <8 x double> %res
1297}
1298
1299declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1300
1301define <16 x float>@test_int_x86_avx512_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) {
1302; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_512:
1303; CHECK:       ## %bb.0:
1304; CHECK-NEXT:    vunpckhps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xc1]
1305; CHECK-NEXT:    ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1306; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1307  %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1308  ret <16 x float> %res
1309}
1310
1311define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1312; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1313; X86:       ## %bb.0:
1314; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1315; X86-NEXT:    vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1316; X86-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1317; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1318; X86-NEXT:    retl ## encoding: [0xc3]
1319;
1320; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1321; X64:       ## %bb.0:
1322; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1323; X64-NEXT:    vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1324; X64-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1325; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1326; X64-NEXT:    retq ## encoding: [0xc3]
1327  %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1328  ret <16 x float> %res
1329}
1330
1331declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1332
1333define <8 x double>@test_int_x86_avx512_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) {
1334; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_512:
1335; CHECK:       ## %bb.0:
1336; CHECK-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xc1]
1337; CHECK-NEXT:    ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1338; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1339  %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1340  ret <8 x double> %res
1341}
1342
1343define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1344; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1345; X86:       ## %bb.0:
1346; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1347; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1348; X86-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1349; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1350; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1351; X86-NEXT:    retl ## encoding: [0xc3]
1352;
1353; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1354; X64:       ## %bb.0:
1355; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1356; X64-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1357; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1358; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1359; X64-NEXT:    retq ## encoding: [0xc3]
1360  %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1361  ret <8 x double> %res
1362}
1363
1364declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1365
1366define <16 x float>@test_int_x86_avx512_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) {
1367; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_512:
1368; CHECK:       ## %bb.0:
1369; CHECK-NEXT:    vunpcklps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xc1]
1370; CHECK-NEXT:    ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1371; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1372  %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1373  ret <16 x float> %res
1374}
1375
1376define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1377; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1378; X86:       ## %bb.0:
1379; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1380; X86-NEXT:    vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1381; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1382; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1383; X86-NEXT:    retl ## encoding: [0xc3]
1384;
1385; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1386; X64:       ## %bb.0:
1387; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1388; X64-NEXT:    vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1389; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1390; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1391; X64-NEXT:    retq ## encoding: [0xc3]
1392  %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1393  ret <16 x float> %res
1394}
1395
1396declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1397
1398define <8 x i64>@test_int_x86_avx512_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
1399; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_512:
1400; CHECK:       ## %bb.0:
1401; CHECK-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xc1]
1402; CHECK-NEXT:    ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1403; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1404  %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1405  ret <8 x i64> %res
1406}
1407
1408define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1409; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1410; X86:       ## %bb.0:
1411; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1412; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1413; X86-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1414; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1415; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1416; X86-NEXT:    retl ## encoding: [0xc3]
1417;
1418; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1419; X64:       ## %bb.0:
1420; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1421; X64-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1422; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1423; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1424; X64-NEXT:    retq ## encoding: [0xc3]
1425  %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1426  ret <8 x i64> %res
1427}
1428
1429define <8 x i64>@test_int_x86_avx512_maskz_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
1430; X86-LABEL: test_int_x86_avx512_maskz_punpcklqd_q_512:
1431; X86:       ## %bb.0:
1432; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1433; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1434; X86-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1435; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1436; X86-NEXT:    retl ## encoding: [0xc3]
1437;
1438; X64-LABEL: test_int_x86_avx512_maskz_punpcklqd_q_512:
1439; X64:       ## %bb.0:
1440; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1441; X64-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1442; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1443; X64-NEXT:    retq ## encoding: [0xc3]
1444  %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
1445  ret <8 x i64> %res
1446}
1447
1448declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1449
1450define <8 x i64>@test_int_x86_avx512_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
1451; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_512:
1452; CHECK:       ## %bb.0:
1453; CHECK-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xc1]
1454; CHECK-NEXT:    ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1455; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1456  %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1457  ret <8 x i64> %res
1458}
1459
1460define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1461; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1462; X86:       ## %bb.0:
1463; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1464; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1465; X86-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1466; X86-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1467; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1468; X86-NEXT:    retl ## encoding: [0xc3]
1469;
1470; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1471; X64:       ## %bb.0:
1472; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1473; X64-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1474; X64-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1475; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1476; X64-NEXT:    retq ## encoding: [0xc3]
1477  %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1478  ret <8 x i64> %res
1479}
1480
1481declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1482
1483define <16 x i32>@test_int_x86_avx512_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
1484; CHECK-LABEL: test_int_x86_avx512_punpckhd_q_512:
1485; CHECK:       ## %bb.0:
1486; CHECK-NEXT:    vunpckhps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xc1]
1487; CHECK-NEXT:    ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1488; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1489  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1490  ret <16 x i32> %res
1491}
1492
1493define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1494; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1495; X86:       ## %bb.0:
1496; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497; X86-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1498; X86-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1499; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1500; X86-NEXT:    retl ## encoding: [0xc3]
1501;
1502; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1503; X64:       ## %bb.0:
1504; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1505; X64-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1506; X64-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1507; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1508; X64-NEXT:    retq ## encoding: [0xc3]
1509  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1510  ret <16 x i32> %res
1511}
1512
1513declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1514
1515define <16 x i32>@test_int_x86_avx512_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
1516; CHECK-LABEL: test_int_x86_avx512_punpckld_q_512:
1517; CHECK:       ## %bb.0:
1518; CHECK-NEXT:    vunpcklps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xc1]
1519; CHECK-NEXT:    ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1520; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1521  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1522  ret <16 x i32> %res
1523}
1524
1525define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1526; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1527; X86:       ## %bb.0:
1528; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1529; X86-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1530; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1531; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1532; X86-NEXT:    retl ## encoding: [0xc3]
1533;
1534; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1535; X64:       ## %bb.0:
1536; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1537; X64-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1538; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1539; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1540; X64-NEXT:    retq ## encoding: [0xc3]
1541  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1542  ret <16 x i32> %res
1543}
1544
1545define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1546; CHECK-LABEL: test_x86_avx512_pslli_d:
1547; CHECK:       ## %bb.0:
1548; CHECK-NEXT:    vpslld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x07]
1549; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1550  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1551  ret <16 x i32> %res
1552}
1553
1554define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1555; X86-LABEL: test_x86_avx512_mask_pslli_d:
1556; X86:       ## %bb.0:
1557; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1558; X86-NEXT:    vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1559; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1560; X86-NEXT:    retl ## encoding: [0xc3]
1561;
1562; X64-LABEL: test_x86_avx512_mask_pslli_d:
1563; X64:       ## %bb.0:
1564; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1565; X64-NEXT:    vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1566; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1567; X64-NEXT:    retq ## encoding: [0xc3]
1568  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1569  ret <16 x i32> %res
1570}
1571
1572define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1573; X86-LABEL: test_x86_avx512_maskz_pslli_d:
1574; X86:       ## %bb.0:
1575; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1576; X86-NEXT:    vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1577; X86-NEXT:    retl ## encoding: [0xc3]
1578;
1579; X64-LABEL: test_x86_avx512_maskz_pslli_d:
1580; X64:       ## %bb.0:
1581; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1582; X64-NEXT:    vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1583; X64-NEXT:    retq ## encoding: [0xc3]
1584  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1585  ret <16 x i32> %res
1586}
1587
1588declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1589
1590define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1591; CHECK-LABEL: test_x86_avx512_pslli_q:
1592; CHECK:       ## %bb.0:
1593; CHECK-NEXT:    vpsllq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x07]
1594; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1595  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1596  ret <8 x i64> %res
1597}
1598
1599define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1600; X86-LABEL: test_x86_avx512_mask_pslli_q:
1601; X86:       ## %bb.0:
1602; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1603; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1604; X86-NEXT:    vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1605; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1606; X86-NEXT:    retl ## encoding: [0xc3]
1607;
1608; X64-LABEL: test_x86_avx512_mask_pslli_q:
1609; X64:       ## %bb.0:
1610; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1611; X64-NEXT:    vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1612; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1613; X64-NEXT:    retq ## encoding: [0xc3]
1614  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1615  ret <8 x i64> %res
1616}
1617
1618define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1619; X86-LABEL: test_x86_avx512_maskz_pslli_q:
1620; X86:       ## %bb.0:
1621; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1622; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1623; X86-NEXT:    vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1624; X86-NEXT:    retl ## encoding: [0xc3]
1625;
1626; X64-LABEL: test_x86_avx512_maskz_pslli_q:
1627; X64:       ## %bb.0:
1628; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1629; X64-NEXT:    vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1630; X64-NEXT:    retq ## encoding: [0xc3]
1631  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1632  ret <8 x i64> %res
1633}
1634
1635declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1636
1637define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1638; CHECK-LABEL: test_x86_avx512_psrli_d:
1639; CHECK:       ## %bb.0:
1640; CHECK-NEXT:    vpsrld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x07]
1641; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1642  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1643  ret <16 x i32> %res
1644}
1645
1646define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1647; X86-LABEL: test_x86_avx512_mask_psrli_d:
1648; X86:       ## %bb.0:
1649; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1650; X86-NEXT:    vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1651; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1652; X86-NEXT:    retl ## encoding: [0xc3]
1653;
1654; X64-LABEL: test_x86_avx512_mask_psrli_d:
1655; X64:       ## %bb.0:
1656; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1657; X64-NEXT:    vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1658; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1659; X64-NEXT:    retq ## encoding: [0xc3]
1660  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1661  ret <16 x i32> %res
1662}
1663
1664define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1665; X86-LABEL: test_x86_avx512_maskz_psrli_d:
1666; X86:       ## %bb.0:
1667; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1668; X86-NEXT:    vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1669; X86-NEXT:    retl ## encoding: [0xc3]
1670;
1671; X64-LABEL: test_x86_avx512_maskz_psrli_d:
1672; X64:       ## %bb.0:
1673; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1674; X64-NEXT:    vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1675; X64-NEXT:    retq ## encoding: [0xc3]
1676  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1677  ret <16 x i32> %res
1678}
1679
1680declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1681
1682define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1683; CHECK-LABEL: test_x86_avx512_psrli_q:
1684; CHECK:       ## %bb.0:
1685; CHECK-NEXT:    vpsrlq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x07]
1686; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1687  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1688  ret <8 x i64> %res
1689}
1690
1691define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1692; X86-LABEL: test_x86_avx512_mask_psrli_q:
1693; X86:       ## %bb.0:
1694; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1695; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1696; X86-NEXT:    vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1697; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1698; X86-NEXT:    retl ## encoding: [0xc3]
1699;
1700; X64-LABEL: test_x86_avx512_mask_psrli_q:
1701; X64:       ## %bb.0:
1702; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1703; X64-NEXT:    vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1704; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1705; X64-NEXT:    retq ## encoding: [0xc3]
1706  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1707  ret <8 x i64> %res
1708}
1709
1710define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1711; X86-LABEL: test_x86_avx512_maskz_psrli_q:
1712; X86:       ## %bb.0:
1713; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1714; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1715; X86-NEXT:    vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1716; X86-NEXT:    retl ## encoding: [0xc3]
1717;
1718; X64-LABEL: test_x86_avx512_maskz_psrli_q:
1719; X64:       ## %bb.0:
1720; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1721; X64-NEXT:    vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1722; X64-NEXT:    retq ## encoding: [0xc3]
1723  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1724  ret <8 x i64> %res
1725}
1726
1727declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1728
1729define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1730; CHECK-LABEL: test_x86_avx512_psrai_d:
1731; CHECK:       ## %bb.0:
1732; CHECK-NEXT:    vpsrad $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x07]
1733; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1734  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1735  ret <16 x i32> %res
1736}
1737
1738define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1739; X86-LABEL: test_x86_avx512_mask_psrai_d:
1740; X86:       ## %bb.0:
1741; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1742; X86-NEXT:    vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1743; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1744; X86-NEXT:    retl ## encoding: [0xc3]
1745;
1746; X64-LABEL: test_x86_avx512_mask_psrai_d:
1747; X64:       ## %bb.0:
1748; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1749; X64-NEXT:    vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1750; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1751; X64-NEXT:    retq ## encoding: [0xc3]
1752  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1753  ret <16 x i32> %res
1754}
1755
1756define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1757; X86-LABEL: test_x86_avx512_maskz_psrai_d:
1758; X86:       ## %bb.0:
1759; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1760; X86-NEXT:    vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1761; X86-NEXT:    retl ## encoding: [0xc3]
1762;
1763; X64-LABEL: test_x86_avx512_maskz_psrai_d:
1764; X64:       ## %bb.0:
1765; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1766; X64-NEXT:    vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1767; X64-NEXT:    retq ## encoding: [0xc3]
1768  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1769  ret <16 x i32> %res
1770}
1771
1772declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1773
1774define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1775; CHECK-LABEL: test_x86_avx512_psrai_q:
1776; CHECK:       ## %bb.0:
1777; CHECK-NEXT:    vpsraq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x07]
1778; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1779  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1780  ret <8 x i64> %res
1781}
1782
1783define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1784; X86-LABEL: test_x86_avx512_mask_psrai_q:
1785; X86:       ## %bb.0:
1786; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1787; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1788; X86-NEXT:    vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1789; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1790; X86-NEXT:    retl ## encoding: [0xc3]
1791;
1792; X64-LABEL: test_x86_avx512_mask_psrai_q:
1793; X64:       ## %bb.0:
1794; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1795; X64-NEXT:    vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1796; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1797; X64-NEXT:    retq ## encoding: [0xc3]
1798  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1799  ret <8 x i64> %res
1800}
1801
1802define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1803; X86-LABEL: test_x86_avx512_maskz_psrai_q:
1804; X86:       ## %bb.0:
1805; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1806; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1807; X86-NEXT:    vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1808; X86-NEXT:    retl ## encoding: [0xc3]
1809;
1810; X64-LABEL: test_x86_avx512_maskz_psrai_q:
1811; X64:       ## %bb.0:
1812; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1813; X64-NEXT:    vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1814; X64-NEXT:    retq ## encoding: [0xc3]
1815  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1816  ret <8 x i64> %res
1817}
1818
1819declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1820
1821declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)
1822
1823define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) {
1824; X86-LABEL: test_storent_q_512:
1825; X86:       ## %bb.0:
1826; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1827; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1828; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1829; X86-NEXT:    retl ## encoding: [0xc3]
1830;
1831; X64-LABEL: test_storent_q_512:
1832; X64:       ## %bb.0:
1833; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1834; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1835; X64-NEXT:    retq ## encoding: [0xc3]
1836  call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
1837  ret void
1838}
1839
1840declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)
1841
1842define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
1843; X86-LABEL: test_storent_pd_512:
1844; X86:       ## %bb.0:
1845; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1846; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1847; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1848; X86-NEXT:    retl ## encoding: [0xc3]
1849;
1850; X64-LABEL: test_storent_pd_512:
1851; X64:       ## %bb.0:
1852; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1853; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1854; X64-NEXT:    retq ## encoding: [0xc3]
1855  call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
1856  ret void
1857}
1858
1859declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)
1860
1861define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
1862; X86-LABEL: test_storent_ps_512:
1863; X86:       ## %bb.0:
1864; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1865; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1866; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1867; X86-NEXT:    retl ## encoding: [0xc3]
1868;
1869; X64-LABEL: test_storent_ps_512:
1870; X64:       ## %bb.0:
1871; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1872; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1873; X64-NEXT:    retq ## encoding: [0xc3]
1874  call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
1875  ret void
1876}
1877
1878define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1879; CHECK-LABEL: test_xor_epi32:
1880; CHECK:       ## %bb.0:
1881; CHECK-NEXT:    vpxord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1882; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1883  %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1884  ret < 16 x i32> %res
1885}
1886
1887define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1888; X86-LABEL: test_mask_xor_epi32:
1889; X86:       ## %bb.0:
1890; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1891; X86-NEXT:    vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1892; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1893; X86-NEXT:    retl ## encoding: [0xc3]
1894;
1895; X64-LABEL: test_mask_xor_epi32:
1896; X64:       ## %bb.0:
1897; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1898; X64-NEXT:    vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1899; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1900; X64-NEXT:    retq ## encoding: [0xc3]
1901  %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1902  ret < 16 x i32> %res
1903}
1904
1905declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1906
1907define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1908; CHECK-LABEL: test_or_epi32:
1909; CHECK:       ## %bb.0:
1910; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1911; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1912  %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1913  ret < 16 x i32> %res
1914}
1915
1916define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1917; X86-LABEL: test_mask_or_epi32:
1918; X86:       ## %bb.0:
1919; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1920; X86-NEXT:    vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1921; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1922; X86-NEXT:    retl ## encoding: [0xc3]
1923;
1924; X64-LABEL: test_mask_or_epi32:
1925; X64:       ## %bb.0:
1926; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1927; X64-NEXT:    vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1928; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1929; X64-NEXT:    retq ## encoding: [0xc3]
1930  %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1931  ret < 16 x i32> %res
1932}
1933
1934declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1935
1936define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1937; CHECK-LABEL: test_and_epi32:
1938; CHECK:       ## %bb.0:
1939; CHECK-NEXT:    vpandd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1940; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1941  %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1942  ret < 16 x i32> %res
1943}
1944
1945define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1946; X86-LABEL: test_mask_and_epi32:
1947; X86:       ## %bb.0:
1948; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1949; X86-NEXT:    vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1950; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1951; X86-NEXT:    retl ## encoding: [0xc3]
1952;
1953; X64-LABEL: test_mask_and_epi32:
1954; X64:       ## %bb.0:
1955; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1956; X64-NEXT:    vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1957; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1958; X64-NEXT:    retq ## encoding: [0xc3]
1959  %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1960  ret < 16 x i32> %res
1961}
1962
1963declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1964
1965define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1966; CHECK-LABEL: test_xor_epi64:
1967; CHECK:       ## %bb.0:
1968; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1969; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
1970  %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1971  ret < 8 x i64> %res
1972}
1973
1974define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1975; X86-LABEL: test_mask_xor_epi64:
1976; X86:       ## %bb.0:
1977; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1978; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1979; X86-NEXT:    vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1980; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1981; X86-NEXT:    retl ## encoding: [0xc3]
1982;
1983; X64-LABEL: test_mask_xor_epi64:
1984; X64:       ## %bb.0:
1985; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1986; X64-NEXT:    vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1987; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1988; X64-NEXT:    retq ## encoding: [0xc3]
1989  %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1990  ret < 8 x i64> %res
1991}
1992
1993declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1994
1995define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1996; CHECK-LABEL: test_or_epi64:
1997; CHECK:       ## %bb.0:
1998; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1999; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2000  %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2001  ret < 8 x i64> %res
2002}
2003
2004define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2005; X86-LABEL: test_mask_or_epi64:
2006; X86:       ## %bb.0:
2007; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2008; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2009; X86-NEXT:    vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
2010; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2011; X86-NEXT:    retl ## encoding: [0xc3]
2012;
2013; X64-LABEL: test_mask_or_epi64:
2014; X64:       ## %bb.0:
2015; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2016; X64-NEXT:    vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
2017; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2018; X64-NEXT:    retq ## encoding: [0xc3]
2019  %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2020  ret < 8 x i64> %res
2021}
2022
2023declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2024
2025define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
2026; CHECK-LABEL: test_and_epi64:
2027; CHECK:       ## %bb.0:
2028; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
2029; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2030  %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2031  ret < 8 x i64> %res
2032}
2033
2034define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2035; X86-LABEL: test_mask_and_epi64:
2036; X86:       ## %bb.0:
2037; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2038; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2039; X86-NEXT:    vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
2040; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2041; X86-NEXT:    retl ## encoding: [0xc3]
2042;
2043; X64-LABEL: test_mask_and_epi64:
2044; X64:       ## %bb.0:
2045; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2046; X64-NEXT:    vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
2047; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2048; X64-NEXT:    retq ## encoding: [0xc3]
2049  %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2050  ret < 8 x i64> %res
2051}
2052
2053declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2054
2055define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2056; CHECK-LABEL: test_mask_add_epi32_rr:
2057; CHECK:       ## %bb.0:
2058; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
2059; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2060  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2061  ret < 16 x i32> %res
2062}
2063
2064define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2065; X86-LABEL: test_mask_add_epi32_rrk:
2066; X86:       ## %bb.0:
2067; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2068; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
2069; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2070; X86-NEXT:    retl ## encoding: [0xc3]
2071;
2072; X64-LABEL: test_mask_add_epi32_rrk:
2073; X64:       ## %bb.0:
2074; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2075; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
2076; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2077; X64-NEXT:    retq ## encoding: [0xc3]
2078  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2079  ret < 16 x i32> %res
2080}
2081
2082define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2083; X86-LABEL: test_mask_add_epi32_rrkz:
2084; X86:       ## %bb.0:
2085; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2086; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
2087; X86-NEXT:    retl ## encoding: [0xc3]
2088;
2089; X64-LABEL: test_mask_add_epi32_rrkz:
2090; X64:       ## %bb.0:
2091; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2092; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
2093; X64-NEXT:    retq ## encoding: [0xc3]
2094  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2095  ret < 16 x i32> %res
2096}
2097
2098define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2099; X86-LABEL: test_mask_add_epi32_rm:
2100; X86:       ## %bb.0:
2101; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2102; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x00]
2103; X86-NEXT:    retl ## encoding: [0xc3]
2104;
2105; X64-LABEL: test_mask_add_epi32_rm:
2106; X64:       ## %bb.0:
2107; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
2108; X64-NEXT:    retq ## encoding: [0xc3]
2109  %b = load <16 x i32>, <16 x i32>* %ptr_b
2110  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2111  ret < 16 x i32> %res
2112}
2113
2114define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2115; X86-LABEL: test_mask_add_epi32_rmk:
2116; X86:       ## %bb.0:
2117; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2118; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2119; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x08]
2120; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2121; X86-NEXT:    retl ## encoding: [0xc3]
2122;
2123; X64-LABEL: test_mask_add_epi32_rmk:
2124; X64:       ## %bb.0:
2125; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2126; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
2127; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2128; X64-NEXT:    retq ## encoding: [0xc3]
2129  %b = load <16 x i32>, <16 x i32>* %ptr_b
2130  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2131  ret < 16 x i32> %res
2132}
2133
2134define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2135; X86-LABEL: test_mask_add_epi32_rmkz:
2136; X86:       ## %bb.0:
2137; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2138; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2139; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x00]
2140; X86-NEXT:    retl ## encoding: [0xc3]
2141;
2142; X64-LABEL: test_mask_add_epi32_rmkz:
2143; X64:       ## %bb.0:
2144; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2145; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
2146; X64-NEXT:    retq ## encoding: [0xc3]
2147  %b = load <16 x i32>, <16 x i32>* %ptr_b
2148  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2149  ret < 16 x i32> %res
2150}
2151
2152define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2153; X86-LABEL: test_mask_add_epi32_rmb:
2154; X86:       ## %bb.0:
2155; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2156; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x00]
2157; X86-NEXT:    retl ## encoding: [0xc3]
2158;
2159; X64-LABEL: test_mask_add_epi32_rmb:
2160; X64:       ## %bb.0:
2161; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
2162; X64-NEXT:    retq ## encoding: [0xc3]
2163  %q = load i32, i32* %ptr_b
2164  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2165  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2166  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2167  ret < 16 x i32> %res
2168}
2169
2170define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2171; X86-LABEL: test_mask_add_epi32_rmbk:
2172; X86:       ## %bb.0:
2173; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2174; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2175; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x08]
2176; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2177; X86-NEXT:    retl ## encoding: [0xc3]
2178;
2179; X64-LABEL: test_mask_add_epi32_rmbk:
2180; X64:       ## %bb.0:
2181; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2182; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
2183; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2184; X64-NEXT:    retq ## encoding: [0xc3]
2185  %q = load i32, i32* %ptr_b
2186  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2187  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2188  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2189  ret < 16 x i32> %res
2190}
2191
2192define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2193; X86-LABEL: test_mask_add_epi32_rmbkz:
2194; X86:       ## %bb.0:
2195; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2196; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2197; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x00]
2198; X86-NEXT:    retl ## encoding: [0xc3]
2199;
2200; X64-LABEL: test_mask_add_epi32_rmbkz:
2201; X64:       ## %bb.0:
2202; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2203; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
2204; X64-NEXT:    retq ## encoding: [0xc3]
2205  %q = load i32, i32* %ptr_b
2206  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2207  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2208  %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2209  ret < 16 x i32> %res
2210}
2211
2212declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2213
2214define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2215; CHECK-LABEL: test_mask_sub_epi32_rr:
2216; CHECK:       ## %bb.0:
2217; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
2218; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2219  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2220  ret < 16 x i32> %res
2221}
2222
2223define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2224; X86-LABEL: test_mask_sub_epi32_rrk:
2225; X86:       ## %bb.0:
2226; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2227; X86-NEXT:    vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2228; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2229; X86-NEXT:    retl ## encoding: [0xc3]
2230;
2231; X64-LABEL: test_mask_sub_epi32_rrk:
2232; X64:       ## %bb.0:
2233; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2234; X64-NEXT:    vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2235; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2236; X64-NEXT:    retq ## encoding: [0xc3]
2237  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2238  ret < 16 x i32> %res
2239}
2240
2241define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2242; X86-LABEL: test_mask_sub_epi32_rrkz:
2243; X86:       ## %bb.0:
2244; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2245; X86-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2246; X86-NEXT:    retl ## encoding: [0xc3]
2247;
2248; X64-LABEL: test_mask_sub_epi32_rrkz:
2249; X64:       ## %bb.0:
2250; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2251; X64-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2252; X64-NEXT:    retq ## encoding: [0xc3]
2253  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2254  ret < 16 x i32> %res
2255}
2256
2257define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2258; X86-LABEL: test_mask_sub_epi32_rm:
2259; X86:       ## %bb.0:
2260; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2261; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x00]
2262; X86-NEXT:    retl ## encoding: [0xc3]
2263;
2264; X64-LABEL: test_mask_sub_epi32_rm:
2265; X64:       ## %bb.0:
2266; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
2267; X64-NEXT:    retq ## encoding: [0xc3]
2268  %b = load <16 x i32>, <16 x i32>* %ptr_b
2269  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2270  ret < 16 x i32> %res
2271}
2272
2273define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2274; X86-LABEL: test_mask_sub_epi32_rmk:
2275; X86:       ## %bb.0:
2276; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2277; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2278; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x08]
2279; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2280; X86-NEXT:    retl ## encoding: [0xc3]
2281;
2282; X64-LABEL: test_mask_sub_epi32_rmk:
2283; X64:       ## %bb.0:
2284; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2285; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
2286; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2287; X64-NEXT:    retq ## encoding: [0xc3]
2288  %b = load <16 x i32>, <16 x i32>* %ptr_b
2289  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2290  ret < 16 x i32> %res
2291}
2292
2293define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2294; X86-LABEL: test_mask_sub_epi32_rmkz:
2295; X86:       ## %bb.0:
2296; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2297; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2298; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x00]
2299; X86-NEXT:    retl ## encoding: [0xc3]
2300;
2301; X64-LABEL: test_mask_sub_epi32_rmkz:
2302; X64:       ## %bb.0:
2303; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2304; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
2305; X64-NEXT:    retq ## encoding: [0xc3]
2306  %b = load <16 x i32>, <16 x i32>* %ptr_b
2307  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2308  ret < 16 x i32> %res
2309}
2310
2311define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2312; X86-LABEL: test_mask_sub_epi32_rmb:
2313; X86:       ## %bb.0:
2314; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2315; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x00]
2316; X86-NEXT:    retl ## encoding: [0xc3]
2317;
2318; X64-LABEL: test_mask_sub_epi32_rmb:
2319; X64:       ## %bb.0:
2320; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
2321; X64-NEXT:    retq ## encoding: [0xc3]
2322  %q = load i32, i32* %ptr_b
2323  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2324  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2325  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2326  ret < 16 x i32> %res
2327}
2328
2329define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2330; X86-LABEL: test_mask_sub_epi32_rmbk:
2331; X86:       ## %bb.0:
2332; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2333; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2334; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x08]
2335; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2336; X86-NEXT:    retl ## encoding: [0xc3]
2337;
2338; X64-LABEL: test_mask_sub_epi32_rmbk:
2339; X64:       ## %bb.0:
2340; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2341; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
2342; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2343; X64-NEXT:    retq ## encoding: [0xc3]
2344  %q = load i32, i32* %ptr_b
2345  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2346  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2347  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2348  ret < 16 x i32> %res
2349}
2350
2351define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2352; X86-LABEL: test_mask_sub_epi32_rmbkz:
2353; X86:       ## %bb.0:
2354; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2355; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2356; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x00]
2357; X86-NEXT:    retl ## encoding: [0xc3]
2358;
2359; X64-LABEL: test_mask_sub_epi32_rmbkz:
2360; X64:       ## %bb.0:
2361; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2362; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
2363; X64-NEXT:    retq ## encoding: [0xc3]
2364  %q = load i32, i32* %ptr_b
2365  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2366  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2367  %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2368  ret < 16 x i32> %res
2369}
2370
2371declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2372
2373define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2374; CHECK-LABEL: test_mask_add_epi64_rr:
2375; CHECK:       ## %bb.0:
2376; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
2377; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2378  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2379  ret < 8 x i64> %res
2380}
2381
2382define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2383; X86-LABEL: test_mask_add_epi64_rrk:
2384; X86:       ## %bb.0:
2385; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2386; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2387; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2388; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2389; X86-NEXT:    retl ## encoding: [0xc3]
2390;
2391; X64-LABEL: test_mask_add_epi64_rrk:
2392; X64:       ## %bb.0:
2393; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2394; X64-NEXT:    vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2395; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2396; X64-NEXT:    retq ## encoding: [0xc3]
2397  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2398  ret < 8 x i64> %res
2399}
2400
2401define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2402; X86-LABEL: test_mask_add_epi64_rrkz:
2403; X86:       ## %bb.0:
2404; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2405; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2406; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2407; X86-NEXT:    retl ## encoding: [0xc3]
2408;
2409; X64-LABEL: test_mask_add_epi64_rrkz:
2410; X64:       ## %bb.0:
2411; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2412; X64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2413; X64-NEXT:    retq ## encoding: [0xc3]
2414  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2415  ret < 8 x i64> %res
2416}
2417
2418define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2419; X86-LABEL: test_mask_add_epi64_rm:
2420; X86:       ## %bb.0:
2421; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2422; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x00]
2423; X86-NEXT:    retl ## encoding: [0xc3]
2424;
2425; X64-LABEL: test_mask_add_epi64_rm:
2426; X64:       ## %bb.0:
2427; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
2428; X64-NEXT:    retq ## encoding: [0xc3]
2429  %b = load <8 x i64>, <8 x i64>* %ptr_b
2430  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2431  ret < 8 x i64> %res
2432}
2433
2434define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2435; X86-LABEL: test_mask_add_epi64_rmk:
2436; X86:       ## %bb.0:
2437; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2438; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2439; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2440; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x08]
2441; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2442; X86-NEXT:    retl ## encoding: [0xc3]
2443;
2444; X64-LABEL: test_mask_add_epi64_rmk:
2445; X64:       ## %bb.0:
2446; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2447; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
2448; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2449; X64-NEXT:    retq ## encoding: [0xc3]
2450  %b = load <8 x i64>, <8 x i64>* %ptr_b
2451  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2452  ret < 8 x i64> %res
2453}
2454
2455define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2456; X86-LABEL: test_mask_add_epi64_rmkz:
2457; X86:       ## %bb.0:
2458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2459; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2460; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2461; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x00]
2462; X86-NEXT:    retl ## encoding: [0xc3]
2463;
2464; X64-LABEL: test_mask_add_epi64_rmkz:
2465; X64:       ## %bb.0:
2466; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2467; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
2468; X64-NEXT:    retq ## encoding: [0xc3]
2469  %b = load <8 x i64>, <8 x i64>* %ptr_b
2470  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2471  ret < 8 x i64> %res
2472}
2473
2474define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2475; X86-LABEL: test_mask_add_epi64_rmb:
2476; X86:       ## %bb.0:
2477; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2478; X86-NEXT:    vpaddq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x00]
2479; X86-NEXT:    retl ## encoding: [0xc3]
2480;
2481; X64-LABEL: test_mask_add_epi64_rmb:
2482; X64:       ## %bb.0:
2483; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
2484; X64-NEXT:    retq ## encoding: [0xc3]
2485  %q = load i64, i64* %ptr_b
2486  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2487  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2488  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2489  ret < 8 x i64> %res
2490}
2491
2492define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2493; X86-LABEL: test_mask_add_epi64_rmbk:
2494; X86:       ## %bb.0:
2495; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2496; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2497; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2498; X86-NEXT:    vpaddq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x08]
2499; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2500; X86-NEXT:    retl ## encoding: [0xc3]
2501;
2502; X64-LABEL: test_mask_add_epi64_rmbk:
2503; X64:       ## %bb.0:
2504; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2505; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
2506; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2507; X64-NEXT:    retq ## encoding: [0xc3]
2508  %q = load i64, i64* %ptr_b
2509  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2510  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2511  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2512  ret < 8 x i64> %res
2513}
2514
2515define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2516; X86-LABEL: test_mask_add_epi64_rmbkz:
2517; X86:       ## %bb.0:
2518; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2519; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2520; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2521; X86-NEXT:    vpaddq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x00]
2522; X86-NEXT:    retl ## encoding: [0xc3]
2523;
2524; X64-LABEL: test_mask_add_epi64_rmbkz:
2525; X64:       ## %bb.0:
2526; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2527; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
2528; X64-NEXT:    retq ## encoding: [0xc3]
2529  %q = load i64, i64* %ptr_b
2530  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2531  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2532  %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2533  ret < 8 x i64> %res
2534}
2535
2536declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2537
2538define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2539; CHECK-LABEL: test_mask_sub_epi64_rr:
2540; CHECK:       ## %bb.0:
2541; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
2542; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2543  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2544  ret < 8 x i64> %res
2545}
2546
2547define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2548; X86-LABEL: test_mask_sub_epi64_rrk:
2549; X86:       ## %bb.0:
2550; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2551; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2552; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2553; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2554; X86-NEXT:    retl ## encoding: [0xc3]
2555;
2556; X64-LABEL: test_mask_sub_epi64_rrk:
2557; X64:       ## %bb.0:
2558; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2559; X64-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2560; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2561; X64-NEXT:    retq ## encoding: [0xc3]
2562  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2563  ret < 8 x i64> %res
2564}
2565
2566define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2567; X86-LABEL: test_mask_sub_epi64_rrkz:
2568; X86:       ## %bb.0:
2569; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2570; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2571; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2572; X86-NEXT:    retl ## encoding: [0xc3]
2573;
2574; X64-LABEL: test_mask_sub_epi64_rrkz:
2575; X64:       ## %bb.0:
2576; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2577; X64-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2578; X64-NEXT:    retq ## encoding: [0xc3]
2579  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2580  ret < 8 x i64> %res
2581}
2582
2583define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2584; X86-LABEL: test_mask_sub_epi64_rm:
2585; X86:       ## %bb.0:
2586; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2587; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x00]
2588; X86-NEXT:    retl ## encoding: [0xc3]
2589;
2590; X64-LABEL: test_mask_sub_epi64_rm:
2591; X64:       ## %bb.0:
2592; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
2593; X64-NEXT:    retq ## encoding: [0xc3]
2594  %b = load <8 x i64>, <8 x i64>* %ptr_b
2595  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2596  ret < 8 x i64> %res
2597}
2598
2599define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2600; X86-LABEL: test_mask_sub_epi64_rmk:
2601; X86:       ## %bb.0:
2602; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2603; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2604; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2605; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x08]
2606; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2607; X86-NEXT:    retl ## encoding: [0xc3]
2608;
2609; X64-LABEL: test_mask_sub_epi64_rmk:
2610; X64:       ## %bb.0:
2611; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2612; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2613; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2614; X64-NEXT:    retq ## encoding: [0xc3]
2615  %b = load <8 x i64>, <8 x i64>* %ptr_b
2616  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2617  ret < 8 x i64> %res
2618}
2619
2620define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2621; X86-LABEL: test_mask_sub_epi64_rmkz:
2622; X86:       ## %bb.0:
2623; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2624; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2625; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2626; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x00]
2627; X86-NEXT:    retl ## encoding: [0xc3]
2628;
2629; X64-LABEL: test_mask_sub_epi64_rmkz:
2630; X64:       ## %bb.0:
2631; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2632; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2633; X64-NEXT:    retq ## encoding: [0xc3]
2634  %b = load <8 x i64>, <8 x i64>* %ptr_b
2635  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2636  ret < 8 x i64> %res
2637}
2638
2639define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2640; X86-LABEL: test_mask_sub_epi64_rmb:
2641; X86:       ## %bb.0:
2642; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2643; X86-NEXT:    vpsubq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x00]
2644; X86-NEXT:    retl ## encoding: [0xc3]
2645;
2646; X64-LABEL: test_mask_sub_epi64_rmb:
2647; X64:       ## %bb.0:
2648; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2649; X64-NEXT:    retq ## encoding: [0xc3]
2650  %q = load i64, i64* %ptr_b
2651  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2652  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2653  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2654  ret < 8 x i64> %res
2655}
2656
2657define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2658; X86-LABEL: test_mask_sub_epi64_rmbk:
2659; X86:       ## %bb.0:
2660; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2661; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2662; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2663; X86-NEXT:    vpsubq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x08]
2664; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2665; X86-NEXT:    retl ## encoding: [0xc3]
2666;
2667; X64-LABEL: test_mask_sub_epi64_rmbk:
2668; X64:       ## %bb.0:
2669; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2670; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2671; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2672; X64-NEXT:    retq ## encoding: [0xc3]
2673  %q = load i64, i64* %ptr_b
2674  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2675  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2676  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2677  ret < 8 x i64> %res
2678}
2679
2680define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2681; X86-LABEL: test_mask_sub_epi64_rmbkz:
2682; X86:       ## %bb.0:
2683; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2684; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2685; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2686; X86-NEXT:    vpsubq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x00]
2687; X86-NEXT:    retl ## encoding: [0xc3]
2688;
2689; X64-LABEL: test_mask_sub_epi64_rmbkz:
2690; X64:       ## %bb.0:
2691; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2692; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2693; X64-NEXT:    retq ## encoding: [0xc3]
2694  %q = load i64, i64* %ptr_b
2695  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2696  %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2697  %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2698  ret < 8 x i64> %res
2699}
2700
2701declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2702
2703define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2704; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
2705; CHECK:       ## %bb.0:
2706; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2707; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2708  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2709  ret <16 x i32> %res
2710}
2711
2712define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2713; X86-LABEL: test_mask_mullo_epi32_rrk_512:
2714; X86:       ## %bb.0:
2715; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2716; X86-NEXT:    vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2717; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2718; X86-NEXT:    retl ## encoding: [0xc3]
2719;
2720; X64-LABEL: test_mask_mullo_epi32_rrk_512:
2721; X64:       ## %bb.0:
2722; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2723; X64-NEXT:    vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2724; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2725; X64-NEXT:    retq ## encoding: [0xc3]
2726  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2727  ret < 16 x i32> %res
2728}
2729
2730define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2731; X86-LABEL: test_mask_mullo_epi32_rrkz_512:
2732; X86:       ## %bb.0:
2733; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2734; X86-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2735; X86-NEXT:    retl ## encoding: [0xc3]
2736;
2737; X64-LABEL: test_mask_mullo_epi32_rrkz_512:
2738; X64:       ## %bb.0:
2739; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2740; X64-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2741; X64-NEXT:    retq ## encoding: [0xc3]
2742  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2743  ret < 16 x i32> %res
2744}
2745
2746define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2747; X86-LABEL: test_mask_mullo_epi32_rm_512:
2748; X86:       ## %bb.0:
2749; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2750; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x00]
2751; X86-NEXT:    retl ## encoding: [0xc3]
2752;
2753; X64-LABEL: test_mask_mullo_epi32_rm_512:
2754; X64:       ## %bb.0:
2755; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2756; X64-NEXT:    retq ## encoding: [0xc3]
2757  %b = load <16 x i32>, <16 x i32>* %ptr_b
2758  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2759  ret < 16 x i32> %res
2760}
2761
2762define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2763; X86-LABEL: test_mask_mullo_epi32_rmk_512:
2764; X86:       ## %bb.0:
2765; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2766; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2767; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x08]
2768; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2769; X86-NEXT:    retl ## encoding: [0xc3]
2770;
2771; X64-LABEL: test_mask_mullo_epi32_rmk_512:
2772; X64:       ## %bb.0:
2773; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2774; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2775; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2776; X64-NEXT:    retq ## encoding: [0xc3]
2777  %b = load <16 x i32>, <16 x i32>* %ptr_b
2778  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2779  ret < 16 x i32> %res
2780}
2781
2782define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2783; X86-LABEL: test_mask_mullo_epi32_rmkz_512:
2784; X86:       ## %bb.0:
2785; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2786; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2787; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x00]
2788; X86-NEXT:    retl ## encoding: [0xc3]
2789;
2790; X64-LABEL: test_mask_mullo_epi32_rmkz_512:
2791; X64:       ## %bb.0:
2792; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2793; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2794; X64-NEXT:    retq ## encoding: [0xc3]
2795  %b = load <16 x i32>, <16 x i32>* %ptr_b
2796  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2797  ret < 16 x i32> %res
2798}
2799
2800define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2801; X86-LABEL: test_mask_mullo_epi32_rmb_512:
2802; X86:       ## %bb.0:
2803; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2804; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x00]
2805; X86-NEXT:    retl ## encoding: [0xc3]
2806;
2807; X64-LABEL: test_mask_mullo_epi32_rmb_512:
2808; X64:       ## %bb.0:
2809; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2810; X64-NEXT:    retq ## encoding: [0xc3]
2811  %q = load i32, i32* %ptr_b
2812  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2813  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2814  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2815  ret < 16 x i32> %res
2816}
2817
2818define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2819; X86-LABEL: test_mask_mullo_epi32_rmbk_512:
2820; X86:       ## %bb.0:
2821; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2822; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2823; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x08]
2824; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2825; X86-NEXT:    retl ## encoding: [0xc3]
2826;
2827; X64-LABEL: test_mask_mullo_epi32_rmbk_512:
2828; X64:       ## %bb.0:
2829; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2830; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2831; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2832; X64-NEXT:    retq ## encoding: [0xc3]
2833  %q = load i32, i32* %ptr_b
2834  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2835  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2836  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2837  ret < 16 x i32> %res
2838}
2839
2840define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2841; X86-LABEL: test_mask_mullo_epi32_rmbkz_512:
2842; X86:       ## %bb.0:
2843; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2844; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2845; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x00]
2846; X86-NEXT:    retl ## encoding: [0xc3]
2847;
2848; X64-LABEL: test_mask_mullo_epi32_rmbkz_512:
2849; X64:       ## %bb.0:
2850; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2851; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2852; X64-NEXT:    retq ## encoding: [0xc3]
2853  %q = load i32, i32* %ptr_b
2854  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2855  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2856  %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2857  ret < 16 x i32> %res
2858}
2859
2860declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2861
2862
2863declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
2864
2865define <16 x float>@test_int_x86_avx512_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) {
2866; CHECK-LABEL: test_int_x86_avx512_shuf_f32x4:
2867; CHECK:       ## %bb.0:
2868; CHECK-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc1,0x16]
2869; CHECK-NEXT:    ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2870; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2871  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
2872  ret <16 x float> %res
2873}
2874
2875define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
2876; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2877; X86:       ## %bb.0:
2878; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2879; X86-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2880; X86-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2881; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2882; X86-NEXT:    retl ## encoding: [0xc3]
2883;
2884; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2885; X64:       ## %bb.0:
2886; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2887; X64-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2888; X64-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2889; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2890; X64-NEXT:    retq ## encoding: [0xc3]
2891  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
2892  ret <16 x float> %res
2893}
2894
2895declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
2896
2897define <8 x double>@test_int_x86_avx512_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) {
2898; CHECK-LABEL: test_int_x86_avx512_shuf_f64x2:
2899; CHECK:       ## %bb.0:
2900; CHECK-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc1,0x16]
2901; CHECK-NEXT:    ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2902; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2903  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
2904  ret <8 x double> %res
2905}
2906
2907define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
2908; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2909; X86:       ## %bb.0:
2910; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2911; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2912; X86-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2913; X86-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2914; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
2915; X86-NEXT:    retl ## encoding: [0xc3]
2916;
2917; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2918; X64:       ## %bb.0:
2919; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2920; X64-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2921; X64-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2922; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
2923; X64-NEXT:    retq ## encoding: [0xc3]
2924  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
2925  ret <8 x double> %res
2926}
2927
2928define <8 x double>@test_int_x86_avx512_maskz_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, i8 %x4) {
2929; X86-LABEL: test_int_x86_avx512_maskz_shuf_f64x2:
2930; X86:       ## %bb.0:
2931; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2932; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2933; X86-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2934; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2935; X86-NEXT:    retl ## encoding: [0xc3]
2936;
2937; X64-LABEL: test_int_x86_avx512_maskz_shuf_f64x2:
2938; X64:       ## %bb.0:
2939; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2940; X64-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2941; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2942; X64-NEXT:    retq ## encoding: [0xc3]
2943  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
2944  ret <8 x double> %res
2945}
2946
2947declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
2948
2949define <16 x i32>@test_int_x86_avx512_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3) {
2950; CHECK-LABEL: test_int_x86_avx512_shuf_i32x4:
2951; CHECK:       ## %bb.0:
2952; CHECK-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc1,0x16]
2953; CHECK-NEXT:    ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2954; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2955  %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
2956  ret <16 x i32> %res
2957}
2958
2959define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
2960; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2961; X86:       ## %bb.0:
2962; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2963; X86-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2964; X86-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2965; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2966; X86-NEXT:    retl ## encoding: [0xc3]
2967;
2968; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2969; X64:       ## %bb.0:
2970; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2971; X64-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2972; X64-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2973; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2974; X64-NEXT:    retq ## encoding: [0xc3]
2975  %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
2976  ret <16 x i32> %res
2977}
2978
2979declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
2980
2981define <8 x i64>@test_int_x86_avx512_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3) {
2982; CHECK-LABEL: test_int_x86_avx512_shuf_i64x2:
2983; CHECK:       ## %bb.0:
2984; CHECK-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc1,0x16]
2985; CHECK-NEXT:    ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2986; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
2987  %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
2988  ret <8 x i64> %res
2989}
2990
2991define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
2992; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
2993; X86:       ## %bb.0:
2994; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2995; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2996; X86-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
2997; X86-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2998; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2999; X86-NEXT:    retl ## encoding: [0xc3]
3000;
3001; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
3002; X64:       ## %bb.0:
3003; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3004; X64-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
3005; X64-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3006; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3007; X64-NEXT:    retq ## encoding: [0xc3]
3008  %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
3009  ret <8 x i64> %res
3010}
3011
3012declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
3013
3014define <8 x double>@test_int_x86_avx512_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) {
3015; CHECK-LABEL: test_int_x86_avx512_shuf_pd_512:
3016; CHECK:       ## %bb.0:
3017; CHECK-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xc1,0x16]
3018; CHECK-NEXT:    ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3019; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3020  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
3021  ret <8 x double> %res
3022}
3023
3024define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
3025; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
3026; X86:       ## %bb.0:
3027; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3028; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3029; X86-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
3030; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3031; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
3032; X86-NEXT:    retl ## encoding: [0xc3]
3033;
3034; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
3035; X64:       ## %bb.0:
3036; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3037; X64-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
3038; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3039; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
3040; X64-NEXT:    retq ## encoding: [0xc3]
3041  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
3042  ret <8 x double> %res
3043}
3044
3045define <8 x double>@test_int_x86_avx512_maskz_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, i8 %x4) {
3046; X86-LABEL: test_int_x86_avx512_maskz_shuf_pd_512:
3047; X86:       ## %bb.0:
3048; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3049; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3050; X86-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
3051; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3052; X86-NEXT:    retl ## encoding: [0xc3]
3053;
3054; X64-LABEL: test_int_x86_avx512_maskz_shuf_pd_512:
3055; X64:       ## %bb.0:
3056; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3057; X64-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
3058; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3059; X64-NEXT:    retq ## encoding: [0xc3]
3060  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
3061  ret <8 x double> %res
3062}
3063
3064declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
3065
3066define <16 x float>@test_int_x86_avx512_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) {
3067; CHECK-LABEL: test_int_x86_avx512_shuf_ps_512:
3068; CHECK:       ## %bb.0:
3069; CHECK-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xc1,0x16]
3070; CHECK-NEXT:    ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3071; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3072  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
3073  ret <16 x float> %res
3074}
3075
3076define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
3077; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
3078; X86:       ## %bb.0:
3079; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3080; X86-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
3081; X86-NEXT:    ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3082; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
3083; X86-NEXT:    retl ## encoding: [0xc3]
3084;
3085; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
3086; X64:       ## %bb.0:
3087; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3088; X64-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
3089; X64-NEXT:    ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3090; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
3091; X64-NEXT:    retq ## encoding: [0xc3]
3092  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
3093  ret <16 x float> %res
3094}
3095
3096declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3097
3098define <16 x i32>@test_int_x86_avx512_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3099; CHECK-LABEL: test_int_x86_avx512_pmaxs_d_512:
3100; CHECK:       ## %bb.0:
3101; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
3102; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3103  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3104  ret <16 x i32> %res
3105}
3106
3107define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3108; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
3109; X86:       ## %bb.0:
3110; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3111; X86-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
3112; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3113; X86-NEXT:    retl ## encoding: [0xc3]
3114;
3115; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
3116; X64:       ## %bb.0:
3117; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3118; X64-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
3119; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3120; X64-NEXT:    retq ## encoding: [0xc3]
3121  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3122  ret <16 x i32> %res
3123}
3124
3125declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3126
3127define <8 x i64>@test_int_x86_avx512_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3128; CHECK-LABEL: test_int_x86_avx512_pmaxs_q_512:
3129; CHECK:       ## %bb.0:
3130; CHECK-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
3131; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3132  %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3133  ret <8 x i64> %res
3134}
3135
3136define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3137; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
3138; X86:       ## %bb.0:
3139; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3140; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3141; X86-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
3142; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3143; X86-NEXT:    retl ## encoding: [0xc3]
3144;
3145; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
3146; X64:       ## %bb.0:
3147; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3148; X64-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
3149; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3150; X64-NEXT:    retq ## encoding: [0xc3]
3151  %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3152  ret <8 x i64> %res
3153}
3154
3155declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3156
3157define <16 x i32>@test_int_x86_avx512_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3158; CHECK-LABEL: test_int_x86_avx512_pmaxu_d_512:
3159; CHECK:       ## %bb.0:
3160; CHECK-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xc1]
3161; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3162  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3163  ret <16 x i32> %res
3164}
3165
3166define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3167; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
3168; X86:       ## %bb.0:
3169; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3170; X86-NEXT:    vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
3171; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3172; X86-NEXT:    retl ## encoding: [0xc3]
3173;
3174; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
3175; X64:       ## %bb.0:
3176; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3177; X64-NEXT:    vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
3178; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3179; X64-NEXT:    retq ## encoding: [0xc3]
3180  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3181  ret <16 x i32> %res
3182}
3183
3184declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3185
3186define <8 x i64>@test_int_x86_avx512_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3187; CHECK-LABEL: test_int_x86_avx512_pmaxu_q_512:
3188; CHECK:       ## %bb.0:
3189; CHECK-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xc1]
3190; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3191  %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3192  ret <8 x i64> %res
3193}
3194
3195define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3196; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
3197; X86:       ## %bb.0:
3198; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3199; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3200; X86-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
3201; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3202; X86-NEXT:    retl ## encoding: [0xc3]
3203;
3204; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
3205; X64:       ## %bb.0:
3206; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3207; X64-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
3208; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3209; X64-NEXT:    retq ## encoding: [0xc3]
3210  %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3211  ret <8 x i64> %res
3212}
3213
3214declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3215
3216define <16 x i32>@test_int_x86_avx512_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3217; CHECK-LABEL: test_int_x86_avx512_pmins_d_512:
3218; CHECK:       ## %bb.0:
3219; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xc1]
3220; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3221  %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3222  ret <16 x i32> %res
3223}
3224
3225define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3226; X86-LABEL: test_int_x86_avx512_mask_pmins_d_512:
3227; X86:       ## %bb.0:
3228; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3229; X86-NEXT:    vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
3230; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3231; X86-NEXT:    retl ## encoding: [0xc3]
3232;
3233; X64-LABEL: test_int_x86_avx512_mask_pmins_d_512:
3234; X64:       ## %bb.0:
3235; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3236; X64-NEXT:    vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
3237; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3238; X64-NEXT:    retq ## encoding: [0xc3]
3239  %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3240  ret <16 x i32> %res
3241}
3242
3243declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3244
3245define <8 x i64>@test_int_x86_avx512_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3246; CHECK-LABEL: test_int_x86_avx512_pmins_q_512:
3247; CHECK:       ## %bb.0:
3248; CHECK-NEXT:    vpminsq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xc1]
3249; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3250  %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3251  ret <8 x i64> %res
3252}
3253
3254define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3255; X86-LABEL: test_int_x86_avx512_mask_pmins_q_512:
3256; X86:       ## %bb.0:
3257; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3258; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3259; X86-NEXT:    vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
3260; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3261; X86-NEXT:    retl ## encoding: [0xc3]
3262;
3263; X64-LABEL: test_int_x86_avx512_mask_pmins_q_512:
3264; X64:       ## %bb.0:
3265; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3266; X64-NEXT:    vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
3267; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3268; X64-NEXT:    retq ## encoding: [0xc3]
3269  %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3270  ret <8 x i64> %res
3271}
3272
3273declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3274
3275define <16 x i32>@test_int_x86_avx512_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3276; CHECK-LABEL: test_int_x86_avx512_pminu_d_512:
3277; CHECK:       ## %bb.0:
3278; CHECK-NEXT:    vpminud %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
3279; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3280  %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3281  ret <16 x i32> %res
3282}
3283
3284define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3285; X86-LABEL: test_int_x86_avx512_mask_pminu_d_512:
3286; X86:       ## %bb.0:
3287; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3288; X86-NEXT:    vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
3289; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3290; X86-NEXT:    retl ## encoding: [0xc3]
3291;
3292; X64-LABEL: test_int_x86_avx512_mask_pminu_d_512:
3293; X64:       ## %bb.0:
3294; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3295; X64-NEXT:    vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
3296; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3297; X64-NEXT:    retq ## encoding: [0xc3]
3298  %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3299  ret <16 x i32> %res
3300}
3301
3302declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3303
3304define <8 x i64>@test_int_x86_avx512_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3305; CHECK-LABEL: test_int_x86_avx512_pminu_q_512:
3306; CHECK:       ## %bb.0:
3307; CHECK-NEXT:    vpminuq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xc1]
3308; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3309  %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3310  ret <8 x i64> %res
3311}
3312
3313define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3314; X86-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3315; X86:       ## %bb.0:
3316; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3317; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3318; X86-NEXT:    vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3319; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3320; X86-NEXT:    retl ## encoding: [0xc3]
3321;
3322; X64-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3323; X64:       ## %bb.0:
3324; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3325; X64-NEXT:    vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3326; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3327; X64-NEXT:    retq ## encoding: [0xc3]
3328  %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3329  ret <8 x i64> %res
3330}
3331
3332define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3333; X86-LABEL: test_mm_mask_move_ss:
3334; X86:       ## %bb.0: ## %entry
3335; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3336; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3337; X86-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3338; X86-NEXT:    retl ## encoding: [0xc3]
3339;
3340; X64-LABEL: test_mm_mask_move_ss:
3341; X64:       ## %bb.0: ## %entry
3342; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3343; X64-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3344; X64-NEXT:    retq ## encoding: [0xc3]
3345entry:
3346  %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U)
3347  ret <4 x float> %res
3348}
3349
3350
3351define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3352; X86-LABEL: test_mm_maskz_move_ss:
3353; X86:       ## %bb.0: ## %entry
3354; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3355; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3356; X86-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3357; X86-NEXT:    retl ## encoding: [0xc3]
3358;
3359; X64-LABEL: test_mm_maskz_move_ss:
3360; X64:       ## %bb.0: ## %entry
3361; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3362; X64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3363; X64-NEXT:    retq ## encoding: [0xc3]
3364entry:
3365  %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U)
3366  ret <4 x float> %res
3367}
3368
3369define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3370; X86-LABEL: test_mm_mask_move_sd:
3371; X86:       ## %bb.0: ## %entry
3372; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3373; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3374; X86-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3375; X86-NEXT:    retl ## encoding: [0xc3]
3376;
3377; X64-LABEL: test_mm_mask_move_sd:
3378; X64:       ## %bb.0: ## %entry
3379; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3380; X64-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3381; X64-NEXT:    retq ## encoding: [0xc3]
3382entry:
3383  %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U)
3384  ret <2 x double> %res
3385}
3386
3387define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3388; X86-LABEL: test_mm_maskz_move_sd:
3389; X86:       ## %bb.0: ## %entry
3390; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3391; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3392; X86-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3393; X86-NEXT:    retl ## encoding: [0xc3]
3394;
3395; X64-LABEL: test_mm_maskz_move_sd:
3396; X64:       ## %bb.0: ## %entry
3397; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3398; X64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3399; X64-NEXT:    retq ## encoding: [0xc3]
3400entry:
3401  %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U)
3402  ret <2 x double> %res
3403}
3404
3405declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
3406declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
3407
3408declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
3409
3410define <16 x i32>@test_int_x86_avx512_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1) {
3411; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_512:
3412; CHECK:       ## %bb.0:
3413; CHECK-NEXT:    vpmovzxbd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xc0]
3414; CHECK-NEXT:    ## zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3415; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3416  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3417  ret <16 x i32> %res
3418}
3419
3420define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3421; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3422; X86:       ## %bb.0:
3423; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3424; X86-NEXT:    vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3425; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3426; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3427; X86-NEXT:    retl ## encoding: [0xc3]
3428;
3429; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3430; X64:       ## %bb.0:
3431; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3432; X64-NEXT:    vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3433; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3434; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3435; X64-NEXT:    retq ## encoding: [0xc3]
3436  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3437  ret <16 x i32> %res
3438}
3439
3440define <16 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_512(<16 x i8> %x0, i16 %x2) {
3441; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_512:
3442; X86:       ## %bb.0:
3443; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3444; X86-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3445; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3446; X86-NEXT:    retl ## encoding: [0xc3]
3447;
3448; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_512:
3449; X64:       ## %bb.0:
3450; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3451; X64-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3452; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3453; X64-NEXT:    retq ## encoding: [0xc3]
3454  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3455  ret <16 x i32> %res
3456}
3457
3458declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
3459
3460define <8 x i64>@test_int_x86_avx512_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1) {
3461; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_512:
3462; CHECK:       ## %bb.0:
3463; CHECK-NEXT:    vpmovzxbq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xc0]
3464; CHECK-NEXT:    ## zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3465; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3466  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3467  ret <8 x i64> %res
3468}
3469
3470define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3471; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3472; X86:       ## %bb.0:
3473; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3474; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3475; X86-NEXT:    vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3476; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3477; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3478; X86-NEXT:    retl ## encoding: [0xc3]
3479;
3480; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3481; X64:       ## %bb.0:
3482; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3483; X64-NEXT:    vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3484; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3485; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3486; X64-NEXT:    retq ## encoding: [0xc3]
3487  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3488  ret <8 x i64> %res
3489}
3490
3491define <8 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_512(<16 x i8> %x0, i8 %x2) {
3492; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_512:
3493; X86:       ## %bb.0:
3494; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3495; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3496; X86-NEXT:    vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3497; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3498; X86-NEXT:    retl ## encoding: [0xc3]
3499;
3500; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_512:
3501; X64:       ## %bb.0:
3502; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3503; X64-NEXT:    vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3504; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3505; X64-NEXT:    retq ## encoding: [0xc3]
3506  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3507  ret <8 x i64> %res
3508}
3509
3510declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
3511
3512define <8 x i64>@test_int_x86_avx512_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1) {
3513; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_512:
3514; CHECK:       ## %bb.0:
3515; CHECK-NEXT:    vpmovzxdq %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xc0]
3516; CHECK-NEXT:    ## zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3517; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3518  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3519  ret <8 x i64> %res
3520}
3521
3522define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3523; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3524; X86:       ## %bb.0:
3525; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3526; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3527; X86-NEXT:    vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3528; X86-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3529; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3530; X86-NEXT:    retl ## encoding: [0xc3]
3531;
3532; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3533; X64:       ## %bb.0:
3534; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3535; X64-NEXT:    vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3536; X64-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3537; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3538; X64-NEXT:    retq ## encoding: [0xc3]
3539  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3540  ret <8 x i64> %res
3541}
3542
3543define <8 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_512(<8 x i32> %x0, i8 %x2) {
3544; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_512:
3545; X86:       ## %bb.0:
3546; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3547; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3548; X86-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3549; X86-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3550; X86-NEXT:    retl ## encoding: [0xc3]
3551;
3552; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_512:
3553; X64:       ## %bb.0:
3554; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3555; X64-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3556; X64-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3557; X64-NEXT:    retq ## encoding: [0xc3]
3558  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3559  ret <8 x i64> %res
3560}
3561
3562declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
3563
3564define <16 x i32>@test_int_x86_avx512_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1) {
3565; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_512:
3566; CHECK:       ## %bb.0:
3567; CHECK-NEXT:    vpmovzxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xc0]
3568; CHECK-NEXT:    ## zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3569; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3570  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3571  ret <16 x i32> %res
3572}
3573
3574define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3575; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3576; X86:       ## %bb.0:
3577; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3578; X86-NEXT:    vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3579; X86-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3580; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3581; X86-NEXT:    retl ## encoding: [0xc3]
3582;
3583; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3584; X64:       ## %bb.0:
3585; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3586; X64-NEXT:    vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3587; X64-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3588; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3589; X64-NEXT:    retq ## encoding: [0xc3]
3590  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3591  ret <16 x i32> %res
3592}
3593
3594define <16 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_512(<16 x i16> %x0, i16 %x2) {
3595; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_512:
3596; X86:       ## %bb.0:
3597; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3598; X86-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3599; X86-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3600; X86-NEXT:    retl ## encoding: [0xc3]
3601;
3602; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_512:
3603; X64:       ## %bb.0:
3604; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3605; X64-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3606; X64-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3607; X64-NEXT:    retq ## encoding: [0xc3]
3608  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3609  ret <16 x i32> %res
3610}
3611
3612declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
3613
3614define <8 x i64>@test_int_x86_avx512_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1) {
3615; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_512:
3616; CHECK:       ## %bb.0:
3617; CHECK-NEXT:    vpmovzxwq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xc0]
3618; CHECK-NEXT:    ## zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3619; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3620  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3621  ret <8 x i64> %res
3622}
3623
3624define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3625; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3626; X86:       ## %bb.0:
3627; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3628; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3629; X86-NEXT:    vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3630; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3631; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3632; X86-NEXT:    retl ## encoding: [0xc3]
3633;
3634; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3635; X64:       ## %bb.0:
3636; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3637; X64-NEXT:    vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3638; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3639; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3640; X64-NEXT:    retq ## encoding: [0xc3]
3641  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3642  ret <8 x i64> %res
3643}
3644
3645define <8 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_512(<8 x i16> %x0, i8 %x2) {
3646; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_512:
3647; X86:       ## %bb.0:
3648; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3649; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3650; X86-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3651; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3652; X86-NEXT:    retl ## encoding: [0xc3]
3653;
3654; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_512:
3655; X64:       ## %bb.0:
3656; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3657; X64-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3658; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3659; X64-NEXT:    retq ## encoding: [0xc3]
3660  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3661  ret <8 x i64> %res
3662}
3663
3664declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
3665
3666define <16 x i32>@test_int_x86_avx512_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1) {
3667; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_512:
3668; CHECK:       ## %bb.0:
3669; CHECK-NEXT:    vpmovsxbd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xc0]
3670; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3671  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3672  ret <16 x i32> %res
3673}
3674
3675define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3676; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3677; X86:       ## %bb.0:
3678; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3679; X86-NEXT:    vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3680; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3681; X86-NEXT:    retl ## encoding: [0xc3]
3682;
3683; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3684; X64:       ## %bb.0:
3685; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3686; X64-NEXT:    vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3687; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3688; X64-NEXT:    retq ## encoding: [0xc3]
3689  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3690  ret <16 x i32> %res
3691}
3692
3693define <16 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_512(<16 x i8> %x0, i16 %x2) {
3694; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_512:
3695; X86:       ## %bb.0:
3696; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3697; X86-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3698; X86-NEXT:    retl ## encoding: [0xc3]
3699;
3700; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_512:
3701; X64:       ## %bb.0:
3702; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3703; X64-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3704; X64-NEXT:    retq ## encoding: [0xc3]
3705  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3706  ret <16 x i32> %res
3707}
3708
3709declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8)
3710
3711define <8 x i64>@test_int_x86_avx512_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1) {
3712; CHECK-LABEL: test_int_x86_avx512_pmovsxb_q_512:
3713; CHECK:       ## %bb.0:
3714; CHECK-NEXT:    vpmovsxbq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xc0]
3715; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3716  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3717  ret <8 x i64> %res
3718}
3719
3720define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3721; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3722; X86:       ## %bb.0:
3723; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3724; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3725; X86-NEXT:    vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3726; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3727; X86-NEXT:    retl ## encoding: [0xc3]
3728;
3729; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3730; X64:       ## %bb.0:
3731; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3732; X64-NEXT:    vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3733; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3734; X64-NEXT:    retq ## encoding: [0xc3]
3735  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3736  ret <8 x i64> %res
3737}
3738
3739define <8 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_512(<16 x i8> %x0, i8 %x2) {
3740; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_512:
3741; X86:       ## %bb.0:
3742; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3743; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3744; X86-NEXT:    vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3745; X86-NEXT:    retl ## encoding: [0xc3]
3746;
3747; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_512:
3748; X64:       ## %bb.0:
3749; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3750; X64-NEXT:    vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3751; X64-NEXT:    retq ## encoding: [0xc3]
3752  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3753  ret <8 x i64> %res
3754}
3755
3756declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8)
3757
3758define <8 x i64>@test_int_x86_avx512_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1) {
3759; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_512:
3760; CHECK:       ## %bb.0:
3761; CHECK-NEXT:    vpmovsxdq %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xc0]
3762; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3763  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3764  ret <8 x i64> %res
3765}
3766
3767define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3768; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3769; X86:       ## %bb.0:
3770; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3771; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3772; X86-NEXT:    vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3773; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3774; X86-NEXT:    retl ## encoding: [0xc3]
3775;
3776; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3777; X64:       ## %bb.0:
3778; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3779; X64-NEXT:    vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3780; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3781; X64-NEXT:    retq ## encoding: [0xc3]
3782  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3783  ret <8 x i64> %res
3784}
3785
3786define <8 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_512(<8 x i32> %x0, i8 %x2) {
3787; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_512:
3788; X86:       ## %bb.0:
3789; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3790; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3791; X86-NEXT:    vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3792; X86-NEXT:    retl ## encoding: [0xc3]
3793;
3794; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_512:
3795; X64:       ## %bb.0:
3796; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3797; X64-NEXT:    vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3798; X64-NEXT:    retq ## encoding: [0xc3]
3799  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3800  ret <8 x i64> %res
3801}
3802
3803declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16)
3804
3805define <16 x i32>@test_int_x86_avx512_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1) {
3806; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_512:
3807; CHECK:       ## %bb.0:
3808; CHECK-NEXT:    vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
3809; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3810  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3811  ret <16 x i32> %res
3812}
3813
3814define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3815; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3816; X86:       ## %bb.0:
3817; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3818; X86-NEXT:    vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3819; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3820; X86-NEXT:    retl ## encoding: [0xc3]
3821;
3822; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3823; X64:       ## %bb.0:
3824; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3825; X64-NEXT:    vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3826; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3827; X64-NEXT:    retq ## encoding: [0xc3]
3828  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3829  ret <16 x i32> %res
3830}
3831
3832define <16 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_512(<16 x i16> %x0, i16 %x2) {
3833; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_512:
3834; X86:       ## %bb.0:
3835; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3836; X86-NEXT:    vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3837; X86-NEXT:    retl ## encoding: [0xc3]
3838;
3839; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_512:
3840; X64:       ## %bb.0:
3841; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3842; X64-NEXT:    vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3843; X64-NEXT:    retq ## encoding: [0xc3]
3844  %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3845  ret <16 x i32> %res
3846}
3847
3848declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8)
3849
3850define <8 x i64>@test_int_x86_avx512_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1) {
3851; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_512:
3852; CHECK:       ## %bb.0:
3853; CHECK-NEXT:    vpmovsxwq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xc0]
3854; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3855  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3856  ret <8 x i64> %res
3857}
3858
3859define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3860; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3861; X86:       ## %bb.0:
3862; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3863; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3864; X86-NEXT:    vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3865; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3866; X86-NEXT:    retl ## encoding: [0xc3]
3867;
3868; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3869; X64:       ## %bb.0:
3870; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3871; X64-NEXT:    vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3872; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3873; X64-NEXT:    retq ## encoding: [0xc3]
3874  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3875  ret <8 x i64> %res
3876}
3877
3878define <8 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_512(<8 x i16> %x0, i8 %x2) {
3879; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_512:
3880; X86:       ## %bb.0:
3881; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3882; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3883; X86-NEXT:    vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3884; X86-NEXT:    retl ## encoding: [0xc3]
3885;
3886; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_512:
3887; X64:       ## %bb.0:
3888; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3889; X64-NEXT:    vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3890; X64-NEXT:    retq ## encoding: [0xc3]
3891  %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3892  ret <8 x i64> %res
3893}
3894
3895declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>)
3896
3897define <16 x i32>@test_int_x86_avx512_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1) {
3898; CHECK-LABEL: test_int_x86_avx512_prolv_d_512:
3899; CHECK:       ## %bb.0:
3900; CHECK-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xc1]
3901; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3902  %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3903  ret <16 x i32> %1
3904}
3905
3906define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3907; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512:
3908; X86:       ## %bb.0:
3909; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3910; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3911; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3912; X86-NEXT:    retl ## encoding: [0xc3]
3913;
3914; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512:
3915; X64:       ## %bb.0:
3916; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3917; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3918; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3919; X64-NEXT:    retq ## encoding: [0xc3]
3920  %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3921  %2 = bitcast i16 %x3 to <16 x i1>
3922  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3923  ret <16 x i32> %3
3924}
3925
3926define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
3927; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_512:
3928; X86:       ## %bb.0:
3929; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3930; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3931; X86-NEXT:    retl ## encoding: [0xc3]
3932;
3933; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_512:
3934; X64:       ## %bb.0:
3935; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3936; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3937; X64-NEXT:    retq ## encoding: [0xc3]
3938  %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3939  %2 = bitcast i16 %x3 to <16 x i1>
3940  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
3941  ret <16 x i32> %3
3942}
3943
3944declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>)
3945
3946define <8 x i64>@test_int_x86_avx512_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1) {
3947; CHECK-LABEL: test_int_x86_avx512_prolv_q_512:
3948; CHECK:       ## %bb.0:
3949; CHECK-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xc1]
3950; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
3951  %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3952  ret <8 x i64> %1
3953}
3954
3955define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3956; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512:
3957; X86:       ## %bb.0:
3958; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3959; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3960; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3961; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3962; X86-NEXT:    retl ## encoding: [0xc3]
3963;
3964; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512:
3965; X64:       ## %bb.0:
3966; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3967; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3968; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3969; X64-NEXT:    retq ## encoding: [0xc3]
3970  %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3971  %2 = bitcast i8 %x3 to <8 x i1>
3972  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3973  ret <8 x i64> %3
3974}
3975
3976define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
3977; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_512:
3978; X86:       ## %bb.0:
3979; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3980; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3981; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3982; X86-NEXT:    retl ## encoding: [0xc3]
3983;
3984; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_512:
3985; X64:       ## %bb.0:
3986; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3987; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3988; X64-NEXT:    retq ## encoding: [0xc3]
3989  %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3990  %2 = bitcast i8 %x3 to <8 x i1>
3991  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
3992  ret <8 x i64> %3
3993}
3994
3995declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>)
3996
3997define <16 x i32>@test_int_x86_avx512_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1) {
3998; CHECK-LABEL: test_int_x86_avx512_prorv_d_512:
3999; CHECK:       ## %bb.0:
4000; CHECK-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xc1]
4001; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4002  %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4003  ret <16 x i32> %1
4004}
4005
4006define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4007; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512:
4008; X86:       ## %bb.0:
4009; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4010; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
4011; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4012; X86-NEXT:    retl ## encoding: [0xc3]
4013;
4014; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512:
4015; X64:       ## %bb.0:
4016; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4017; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
4018; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4019; X64-NEXT:    retq ## encoding: [0xc3]
4020  %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4021  %2 = bitcast i16 %x3 to <16 x i1>
4022  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4023  ret <16 x i32> %3
4024}
4025
4026define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
4027; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_512:
4028; X86:       ## %bb.0:
4029; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4030; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
4031; X86-NEXT:    retl ## encoding: [0xc3]
4032;
4033; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_512:
4034; X64:       ## %bb.0:
4035; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4036; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
4037; X64-NEXT:    retq ## encoding: [0xc3]
4038  %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4039  %2 = bitcast i16 %x3 to <16 x i1>
4040  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
4041  ret <16 x i32> %3
4042}
4043
4044declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>)
4045
4046define <8 x i64>@test_int_x86_avx512_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1) {
4047; CHECK-LABEL: test_int_x86_avx512_prorv_q_512:
4048; CHECK:       ## %bb.0:
4049; CHECK-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xc1]
4050; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4051  %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4052  ret <8 x i64> %1
4053}
4054
4055define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4056; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512:
4057; X86:       ## %bb.0:
4058; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4059; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4060; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
4061; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4062; X86-NEXT:    retl ## encoding: [0xc3]
4063;
4064; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512:
4065; X64:       ## %bb.0:
4066; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4067; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
4068; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4069; X64-NEXT:    retq ## encoding: [0xc3]
4070  %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4071  %2 = bitcast i8 %x3 to <8 x i1>
4072  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4073  ret <8 x i64> %3
4074}
4075
4076define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
4077; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_512:
4078; X86:       ## %bb.0:
4079; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4080; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4081; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
4082; X86-NEXT:    retl ## encoding: [0xc3]
4083;
4084; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_512:
4085; X64:       ## %bb.0:
4086; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4087; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
4088; X64-NEXT:    retq ## encoding: [0xc3]
4089  %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4090  %2 = bitcast i8 %x3 to <8 x i1>
4091  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
4092  ret <8 x i64> %3
4093}
4094
4095declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32)
4096
4097define <16 x i32>@test_int_x86_avx512_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4098; X86-LABEL: test_int_x86_avx512_prol_d_512:
4099; X86:       ## %bb.0:
4100; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4101; X86-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
4102; X86-NEXT:    vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
4103; X86-NEXT:    vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
4104; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4105; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4106; X86-NEXT:    retl ## encoding: [0xc3]
4107;
4108; X64-LABEL: test_int_x86_avx512_prol_d_512:
4109; X64:       ## %bb.0:
4110; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4111; X64-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
4112; X64-NEXT:    vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
4113; X64-NEXT:    vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
4114; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4115; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4116; X64-NEXT:    retq ## encoding: [0xc3]
4117  %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3)
4118  %2 = bitcast i16 %x3 to <16 x i1>
4119  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4120  %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 4)
4121  %5 = bitcast i16 %x3 to <16 x i1>
4122  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
4123  %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 5)
4124  %res3 = add <16 x i32> %3, %6
4125  %res4 = add <16 x i32> %res3, %7
4126  ret <16 x i32> %res4
4127}
4128
4129declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32)
4130
4131define <8 x i64>@test_int_x86_avx512_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4132; X86-LABEL: test_int_x86_avx512_prol_q_512:
4133; X86:       ## %bb.0:
4134; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4135; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4136; X86-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
4137; X86-NEXT:    vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
4138; X86-NEXT:    vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
4139; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4140; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4141; X86-NEXT:    retl ## encoding: [0xc3]
4142;
4143; X64-LABEL: test_int_x86_avx512_prol_q_512:
4144; X64:       ## %bb.0:
4145; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4146; X64-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
4147; X64-NEXT:    vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
4148; X64-NEXT:    vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
4149; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4150; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4151; X64-NEXT:    retq ## encoding: [0xc3]
4152  %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3)
4153  %2 = bitcast i8 %x3 to <8 x i1>
4154  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4155  %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 4)
4156  %5 = bitcast i8 %x3 to <8 x i1>
4157  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
4158  %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 5)
4159  %res3 = add <8 x i64> %3, %6
4160  %res4 = add <8 x i64> %res3, %7
4161  ret <8 x i64> %res4
4162}
4163
4164declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32)
4165
4166define <16 x i32>@test_int_x86_avx512_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4167; X86-LABEL: test_int_x86_avx512_pror_d_512:
4168; X86:       ## %bb.0:
4169; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4170; X86-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
4171; X86-NEXT:    vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
4172; X86-NEXT:    vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
4173; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4174; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4175; X86-NEXT:    retl ## encoding: [0xc3]
4176;
4177; X64-LABEL: test_int_x86_avx512_pror_d_512:
4178; X64:       ## %bb.0:
4179; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4180; X64-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
4181; X64-NEXT:    vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
4182; X64-NEXT:    vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
4183; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4184; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4185; X64-NEXT:    retq ## encoding: [0xc3]
4186  %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3)
4187  %2 = bitcast i16 %x3 to <16 x i1>
4188  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4189  %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 4)
4190  %5 = bitcast i16 %x3 to <16 x i1>
4191  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
4192  %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 5)
4193  %res3 = add <16 x i32> %3, %6
4194  %res4 = add <16 x i32> %res3, %7
4195  ret <16 x i32> %res4
4196}
4197
4198declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32)
4199
4200define <8 x i64>@test_int_x86_avx512_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4201; X86-LABEL: test_int_x86_avx512_pror_q_512:
4202; X86:       ## %bb.0:
4203; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4204; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4205; X86-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
4206; X86-NEXT:    vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
4207; X86-NEXT:    vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
4208; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4209; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4210; X86-NEXT:    retl ## encoding: [0xc3]
4211;
4212; X64-LABEL: test_int_x86_avx512_pror_q_512:
4213; X64:       ## %bb.0:
4214; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4215; X64-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
4216; X64-NEXT:    vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
4217; X64-NEXT:    vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
4218; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4219; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4220; X64-NEXT:    retq ## encoding: [0xc3]
4221  %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3)
4222  %2 = bitcast i8 %x3 to <8 x i1>
4223  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4224  %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 4)
4225  %5 = bitcast i8 %x3 to <8 x i1>
4226  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
4227  %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 5)
4228  %res3 = add <8 x i64> %3, %6
4229  %res4 = add <8 x i64> %res3, %7
4230  ret <8 x i64> %res4
4231}
4232
4233declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4234
4235define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4236; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
4237; X86:       ## %bb.0:
4238; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4239; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4240; X86-NEXT:    vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
4241; X86-NEXT:    vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05]
4242; X86-NEXT:    vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06]
4243; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4244; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4245; X86-NEXT:    retl ## encoding: [0xc3]
4246;
4247; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
4248; X64:       ## %bb.0:
4249; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4250; X64-NEXT:    vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
4251; X64-NEXT:    vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05]
4252; X64-NEXT:    vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06]
4253; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4254; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4255; X64-NEXT:    retq ## encoding: [0xc3]
4256  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3)
4257  %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4258  %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 6, <8 x i64> zeroinitializer, i8 %x3)
4259  %res3 = add <8 x i64> %res, %res1
4260  %res4 = add <8 x i64> %res3, %res2
4261  ret <8 x i64> %res4
4262}
4263
4264declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16)
4265
4266define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4267; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512:
4268; X86:       ## %bb.0:
4269; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4270; X86-NEXT:    vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
4271; X86-NEXT:    vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05]
4272; X86-NEXT:    vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06]
4273; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4274; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4275; X86-NEXT:    retl ## encoding: [0xc3]
4276;
4277; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512:
4278; X64:       ## %bb.0:
4279; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4280; X64-NEXT:    vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
4281; X64-NEXT:    vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05]
4282; X64-NEXT:    vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06]
4283; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4284; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4285; X64-NEXT:    retq ## encoding: [0xc3]
4286  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3)
4287  %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4288  %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 6, <16 x i32> zeroinitializer, i16 %x3)
4289  %res3 = add <16 x i32> %res, %res1
4290  %res4 = add <16 x i32> %res3, %res2
4291  ret <16 x i32> %res4
4292}
4293
4294declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16)
4295
4296define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4297; X86-LABEL: test_int_x86_avx512_mask_psra_di_512:
4298; X86:       ## %bb.0:
4299; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4300; X86-NEXT:    vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
4301; X86-NEXT:    vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04]
4302; X86-NEXT:    vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05]
4303; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4304; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4305; X86-NEXT:    retl ## encoding: [0xc3]
4306;
4307; X64-LABEL: test_int_x86_avx512_mask_psra_di_512:
4308; X64:       ## %bb.0:
4309; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4310; X64-NEXT:    vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
4311; X64-NEXT:    vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04]
4312; X64-NEXT:    vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05]
4313; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4314; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4315; X64-NEXT:    retq ## encoding: [0xc3]
4316  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
4317  %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
4318  %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4319  %res3 = add <16 x i32> %res, %res1
4320  %res4 = add <16 x i32> %res3, %res2
4321  ret <16 x i32> %res4
4322}
4323
4324declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4325
4326define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4327; X86-LABEL: test_int_x86_avx512_mask_psra_qi_512:
4328; X86:       ## %bb.0:
4329; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4330; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4331; X86-NEXT:    vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
4332; X86-NEXT:    vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04]
4333; X86-NEXT:    vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05]
4334; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4335; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4336; X86-NEXT:    retl ## encoding: [0xc3]
4337;
4338; X64-LABEL: test_int_x86_avx512_mask_psra_qi_512:
4339; X64:       ## %bb.0:
4340; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4341; X64-NEXT:    vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
4342; X64-NEXT:    vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04]
4343; X64-NEXT:    vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05]
4344; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4345; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4346; X64-NEXT:    retq ## encoding: [0xc3]
4347  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
4348  %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
4349  %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4350  %res3 = add <8 x i64> %res, %res1
4351  %res4 = add <8 x i64> %res3, %res2
4352  ret <8 x i64> %res4
4353}
4354
4355declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16)
4356
4357define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4358; X86-LABEL: test_int_x86_avx512_mask_psll_di_512:
4359; X86:       ## %bb.0:
4360; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4361; X86-NEXT:    vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
4362; X86-NEXT:    vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04]
4363; X86-NEXT:    vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05]
4364; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4365; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4366; X86-NEXT:    retl ## encoding: [0xc3]
4367;
4368; X64-LABEL: test_int_x86_avx512_mask_psll_di_512:
4369; X64:       ## %bb.0:
4370; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4371; X64-NEXT:    vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
4372; X64-NEXT:    vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04]
4373; X64-NEXT:    vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05]
4374; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4375; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4376; X64-NEXT:    retq ## encoding: [0xc3]
4377  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
4378  %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
4379  %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4380  %res3 = add <16 x i32> %res, %res1
4381  %res4 = add <16 x i32> %res3, %res2
4382  ret <16 x i32> %res4
4383}
4384
4385declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4386
4387define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4388; X86-LABEL: test_int_x86_avx512_mask_psll_qi_512:
4389; X86:       ## %bb.0:
4390; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4391; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4392; X86-NEXT:    vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
4393; X86-NEXT:    vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04]
4394; X86-NEXT:    vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05]
4395; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4396; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4397; X86-NEXT:    retl ## encoding: [0xc3]
4398;
4399; X64-LABEL: test_int_x86_avx512_mask_psll_qi_512:
4400; X64:       ## %bb.0:
4401; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4402; X64-NEXT:    vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
4403; X64-NEXT:    vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04]
4404; X64-NEXT:    vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05]
4405; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4406; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4407; X64-NEXT:    retq ## encoding: [0xc3]
4408  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
4409  %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
4410  %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4411  %res3 = add <8 x i64> %res, %res1
4412  %res4 = add <8 x i64> %res3, %res2
4413  ret <8 x i64> %res4
4414}
4415
4416define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
4417; CHECK-LABEL: test_x86_avx512_psll_d:
4418; CHECK:       ## %bb.0:
4419; CHECK-NEXT:    vpslld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf2,0xc1]
4420; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4421  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4422  ret <16 x i32> %res
4423}
4424
4425define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4426; X86-LABEL: test_x86_avx512_mask_psll_d:
4427; X86:       ## %bb.0:
4428; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4429; X86-NEXT:    vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
4430; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4431; X86-NEXT:    retl ## encoding: [0xc3]
4432;
4433; X64-LABEL: test_x86_avx512_mask_psll_d:
4434; X64:       ## %bb.0:
4435; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4436; X64-NEXT:    vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
4437; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4438; X64-NEXT:    retq ## encoding: [0xc3]
4439  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4440  ret <16 x i32> %res
4441}
4442
4443define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4444; X86-LABEL: test_x86_avx512_maskz_psll_d:
4445; X86:       ## %bb.0:
4446; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4447; X86-NEXT:    vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
4448; X86-NEXT:    retl ## encoding: [0xc3]
4449;
4450; X64-LABEL: test_x86_avx512_maskz_psll_d:
4451; X64:       ## %bb.0:
4452; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4453; X64-NEXT:    vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
4454; X64-NEXT:    retq ## encoding: [0xc3]
4455  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4456  ret <16 x i32> %res
4457}
4458
4459declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4460
4461define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
4462; CHECK-LABEL: test_x86_avx512_psll_q:
4463; CHECK:       ## %bb.0:
4464; CHECK-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf3,0xc1]
4465; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4466  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4467  ret <8 x i64> %res
4468}
4469
4470define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4471; X86-LABEL: test_x86_avx512_mask_psll_q:
4472; X86:       ## %bb.0:
4473; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4474; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4475; X86-NEXT:    vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
4476; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4477; X86-NEXT:    retl ## encoding: [0xc3]
4478;
4479; X64-LABEL: test_x86_avx512_mask_psll_q:
4480; X64:       ## %bb.0:
4481; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4482; X64-NEXT:    vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
4483; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4484; X64-NEXT:    retq ## encoding: [0xc3]
4485  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4486  ret <8 x i64> %res
4487}
4488
4489define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4490; X86-LABEL: test_x86_avx512_maskz_psll_q:
4491; X86:       ## %bb.0:
4492; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4493; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4494; X86-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
4495; X86-NEXT:    retl ## encoding: [0xc3]
4496;
4497; X64-LABEL: test_x86_avx512_maskz_psll_q:
4498; X64:       ## %bb.0:
4499; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4500; X64-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
4501; X64-NEXT:    retq ## encoding: [0xc3]
4502  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4503  ret <8 x i64> %res
4504}
4505
4506declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4507
4508define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
4509; CHECK-LABEL: test_x86_avx512_psrl_d:
4510; CHECK:       ## %bb.0:
4511; CHECK-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd2,0xc1]
4512; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4513  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4514  ret <16 x i32> %res
4515}
4516
4517define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4518; X86-LABEL: test_x86_avx512_mask_psrl_d:
4519; X86:       ## %bb.0:
4520; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4521; X86-NEXT:    vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4522; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4523; X86-NEXT:    retl ## encoding: [0xc3]
4524;
4525; X64-LABEL: test_x86_avx512_mask_psrl_d:
4526; X64:       ## %bb.0:
4527; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4528; X64-NEXT:    vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4529; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4530; X64-NEXT:    retq ## encoding: [0xc3]
4531  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4532  ret <16 x i32> %res
4533}
4534
4535define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4536; X86-LABEL: test_x86_avx512_maskz_psrl_d:
4537; X86:       ## %bb.0:
4538; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4539; X86-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4540; X86-NEXT:    retl ## encoding: [0xc3]
4541;
4542; X64-LABEL: test_x86_avx512_maskz_psrl_d:
4543; X64:       ## %bb.0:
4544; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4545; X64-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4546; X64-NEXT:    retq ## encoding: [0xc3]
4547  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4548  ret <16 x i32> %res
4549}
4550
4551declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4552
4553define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
4554; CHECK-LABEL: test_x86_avx512_psrl_q:
4555; CHECK:       ## %bb.0:
4556; CHECK-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd3,0xc1]
4557; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4558  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4559  ret <8 x i64> %res
4560}
4561
4562define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4563; X86-LABEL: test_x86_avx512_mask_psrl_q:
4564; X86:       ## %bb.0:
4565; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4566; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4567; X86-NEXT:    vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4568; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4569; X86-NEXT:    retl ## encoding: [0xc3]
4570;
4571; X64-LABEL: test_x86_avx512_mask_psrl_q:
4572; X64:       ## %bb.0:
4573; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4574; X64-NEXT:    vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4575; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4576; X64-NEXT:    retq ## encoding: [0xc3]
4577  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4578  ret <8 x i64> %res
4579}
4580
4581define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4582; X86-LABEL: test_x86_avx512_maskz_psrl_q:
4583; X86:       ## %bb.0:
4584; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4585; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4586; X86-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4587; X86-NEXT:    retl ## encoding: [0xc3]
4588;
4589; X64-LABEL: test_x86_avx512_maskz_psrl_q:
4590; X64:       ## %bb.0:
4591; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4592; X64-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4593; X64-NEXT:    retq ## encoding: [0xc3]
4594  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4595  ret <8 x i64> %res
4596}
4597
4598declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4599
4600define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
4601; CHECK-LABEL: test_x86_avx512_psra_d:
4602; CHECK:       ## %bb.0:
4603; CHECK-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xe2,0xc1]
4604; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4605  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4606  ret <16 x i32> %res
4607}
4608
4609define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4610; X86-LABEL: test_x86_avx512_mask_psra_d:
4611; X86:       ## %bb.0:
4612; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4613; X86-NEXT:    vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4614; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4615; X86-NEXT:    retl ## encoding: [0xc3]
4616;
4617; X64-LABEL: test_x86_avx512_mask_psra_d:
4618; X64:       ## %bb.0:
4619; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4620; X64-NEXT:    vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4621; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4622; X64-NEXT:    retq ## encoding: [0xc3]
4623  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4624  ret <16 x i32> %res
4625}
4626
4627define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4628; X86-LABEL: test_x86_avx512_maskz_psra_d:
4629; X86:       ## %bb.0:
4630; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4631; X86-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4632; X86-NEXT:    retl ## encoding: [0xc3]
4633;
4634; X64-LABEL: test_x86_avx512_maskz_psra_d:
4635; X64:       ## %bb.0:
4636; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4637; X64-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4638; X64-NEXT:    retq ## encoding: [0xc3]
4639  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4640  ret <16 x i32> %res
4641}
4642
4643declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4644
4645define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
4646; CHECK-LABEL: test_x86_avx512_psra_q:
4647; CHECK:       ## %bb.0:
4648; CHECK-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xe2,0xc1]
4649; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4650  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4651  ret <8 x i64> %res
4652}
4653
4654define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4655; X86-LABEL: test_x86_avx512_mask_psra_q:
4656; X86:       ## %bb.0:
4657; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4658; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4659; X86-NEXT:    vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4660; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4661; X86-NEXT:    retl ## encoding: [0xc3]
4662;
4663; X64-LABEL: test_x86_avx512_mask_psra_q:
4664; X64:       ## %bb.0:
4665; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4666; X64-NEXT:    vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4667; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4668; X64-NEXT:    retq ## encoding: [0xc3]
4669  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4670  ret <8 x i64> %res
4671}
4672
4673define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4674; X86-LABEL: test_x86_avx512_maskz_psra_q:
4675; X86:       ## %bb.0:
4676; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4677; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4678; X86-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4679; X86-NEXT:    retl ## encoding: [0xc3]
4680;
4681; X64-LABEL: test_x86_avx512_maskz_psra_q:
4682; X64:       ## %bb.0:
4683; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4684; X64-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4685; X64-NEXT:    retq ## encoding: [0xc3]
4686  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4687  ret <8 x i64> %res
4688}
4689
4690declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4691
4692define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
4693; CHECK-LABEL: test_x86_avx512_psllv_d:
4694; CHECK:       ## %bb.0:
4695; CHECK-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x47,0xc1]
4696; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4697  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4698  ret <16 x i32> %res
4699}
4700
4701define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4702; X86-LABEL: test_x86_avx512_mask_psllv_d:
4703; X86:       ## %bb.0:
4704; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4705; X86-NEXT:    vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4706; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4707; X86-NEXT:    retl ## encoding: [0xc3]
4708;
4709; X64-LABEL: test_x86_avx512_mask_psllv_d:
4710; X64:       ## %bb.0:
4711; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4712; X64-NEXT:    vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4713; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4714; X64-NEXT:    retq ## encoding: [0xc3]
4715  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4716  ret <16 x i32> %res
4717}
4718
4719define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4720; X86-LABEL: test_x86_avx512_maskz_psllv_d:
4721; X86:       ## %bb.0:
4722; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4723; X86-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4724; X86-NEXT:    retl ## encoding: [0xc3]
4725;
4726; X64-LABEL: test_x86_avx512_maskz_psllv_d:
4727; X64:       ## %bb.0:
4728; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4729; X64-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4730; X64-NEXT:    retq ## encoding: [0xc3]
4731  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4732  ret <16 x i32> %res
4733}
4734
4735declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4736
4737define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
4738; CHECK-LABEL: test_x86_avx512_psllv_q:
4739; CHECK:       ## %bb.0:
4740; CHECK-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x47,0xc1]
4741; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4742  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4743  ret <8 x i64> %res
4744}
4745
4746define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4747; X86-LABEL: test_x86_avx512_mask_psllv_q:
4748; X86:       ## %bb.0:
4749; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4750; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4751; X86-NEXT:    vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4752; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4753; X86-NEXT:    retl ## encoding: [0xc3]
4754;
4755; X64-LABEL: test_x86_avx512_mask_psllv_q:
4756; X64:       ## %bb.0:
4757; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4758; X64-NEXT:    vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4759; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4760; X64-NEXT:    retq ## encoding: [0xc3]
4761  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4762  ret <8 x i64> %res
4763}
4764
4765define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4766; X86-LABEL: test_x86_avx512_maskz_psllv_q:
4767; X86:       ## %bb.0:
4768; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4769; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4770; X86-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4771; X86-NEXT:    retl ## encoding: [0xc3]
4772;
4773; X64-LABEL: test_x86_avx512_maskz_psllv_q:
4774; X64:       ## %bb.0:
4775; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4776; X64-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4777; X64-NEXT:    retq ## encoding: [0xc3]
4778  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4779  ret <8 x i64> %res
4780}
4781
4782declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4783
4784
4785define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
4786; CHECK-LABEL: test_x86_avx512_psrav_d:
4787; CHECK:       ## %bb.0:
4788; CHECK-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x46,0xc1]
4789; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4790  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4791  ret <16 x i32> %res
4792}
4793
4794define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4795; X86-LABEL: test_x86_avx512_mask_psrav_d:
4796; X86:       ## %bb.0:
4797; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4798; X86-NEXT:    vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4799; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4800; X86-NEXT:    retl ## encoding: [0xc3]
4801;
4802; X64-LABEL: test_x86_avx512_mask_psrav_d:
4803; X64:       ## %bb.0:
4804; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4805; X64-NEXT:    vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4806; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4807; X64-NEXT:    retq ## encoding: [0xc3]
4808  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4809  ret <16 x i32> %res
4810}
4811
4812define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4813; X86-LABEL: test_x86_avx512_maskz_psrav_d:
4814; X86:       ## %bb.0:
4815; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4816; X86-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4817; X86-NEXT:    retl ## encoding: [0xc3]
4818;
4819; X64-LABEL: test_x86_avx512_maskz_psrav_d:
4820; X64:       ## %bb.0:
4821; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4822; X64-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4823; X64-NEXT:    retq ## encoding: [0xc3]
4824  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4825  ret <16 x i32> %res
4826}
4827
4828declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4829
4830define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
4831; CHECK-LABEL: test_x86_avx512_psrav_q:
4832; CHECK:       ## %bb.0:
4833; CHECK-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x46,0xc1]
4834; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4835  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4836  ret <8 x i64> %res
4837}
4838
4839define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4840; X86-LABEL: test_x86_avx512_mask_psrav_q:
4841; X86:       ## %bb.0:
4842; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4843; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4844; X86-NEXT:    vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4845; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4846; X86-NEXT:    retl ## encoding: [0xc3]
4847;
4848; X64-LABEL: test_x86_avx512_mask_psrav_q:
4849; X64:       ## %bb.0:
4850; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4851; X64-NEXT:    vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4852; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4853; X64-NEXT:    retq ## encoding: [0xc3]
4854  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4855  ret <8 x i64> %res
4856}
4857
4858define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4859; X86-LABEL: test_x86_avx512_maskz_psrav_q:
4860; X86:       ## %bb.0:
4861; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4862; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4863; X86-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4864; X86-NEXT:    retl ## encoding: [0xc3]
4865;
4866; X64-LABEL: test_x86_avx512_maskz_psrav_q:
4867; X64:       ## %bb.0:
4868; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4869; X64-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4870; X64-NEXT:    retq ## encoding: [0xc3]
4871  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4872  ret <8 x i64> %res
4873}
4874
4875declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4876
4877define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
4878; CHECK-LABEL: test_x86_avx512_psrlv_d:
4879; CHECK:       ## %bb.0:
4880; CHECK-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x45,0xc1]
4881; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4882  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4883  ret <16 x i32> %res
4884}
4885
4886define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4887; X86-LABEL: test_x86_avx512_mask_psrlv_d:
4888; X86:       ## %bb.0:
4889; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4890; X86-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4891; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4892; X86-NEXT:    retl ## encoding: [0xc3]
4893;
4894; X64-LABEL: test_x86_avx512_mask_psrlv_d:
4895; X64:       ## %bb.0:
4896; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4897; X64-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4898; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4899; X64-NEXT:    retq ## encoding: [0xc3]
4900  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4901  ret <16 x i32> %res
4902}
4903
4904define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4905; X86-LABEL: test_x86_avx512_maskz_psrlv_d:
4906; X86:       ## %bb.0:
4907; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4908; X86-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4909; X86-NEXT:    retl ## encoding: [0xc3]
4910;
4911; X64-LABEL: test_x86_avx512_maskz_psrlv_d:
4912; X64:       ## %bb.0:
4913; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4914; X64-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4915; X64-NEXT:    retq ## encoding: [0xc3]
4916  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4917  ret <16 x i32> %res
4918}
4919
4920declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4921
4922define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
4923; CHECK-LABEL: test_x86_avx512_psrlv_q:
4924; CHECK:       ## %bb.0:
4925; CHECK-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0xc1]
4926; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4927  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4928  ret <8 x i64> %res
4929}
4930
4931define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4932; X86-LABEL: test_x86_avx512_mask_psrlv_q:
4933; X86:       ## %bb.0:
4934; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4935; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4936; X86-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4937; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4938; X86-NEXT:    retl ## encoding: [0xc3]
4939;
4940; X64-LABEL: test_x86_avx512_mask_psrlv_q:
4941; X64:       ## %bb.0:
4942; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4943; X64-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4944; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4945; X64-NEXT:    retq ## encoding: [0xc3]
4946  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4947  ret <8 x i64> %res
4948}
4949
4950define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4951; X86-LABEL: test_x86_avx512_maskz_psrlv_q:
4952; X86:       ## %bb.0:
4953; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4954; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4955; X86-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4956; X86-NEXT:    retl ## encoding: [0xc3]
4957;
4958; X64-LABEL: test_x86_avx512_maskz_psrlv_q:
4959; X64:       ## %bb.0:
4960; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4961; X64-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4962; X64-NEXT:    retq ## encoding: [0xc3]
4963  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4964  ret <8 x i64> %res
4965}
4966
4967declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4968
4969define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
4970; X86-LABEL: test_x86_avx512_psrlv_q_memop:
4971; X86:       ## %bb.0:
4972; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4973; X86-NEXT:    vpsrlvq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x00]
4974; X86-NEXT:    retl ## encoding: [0xc3]
4975;
4976; X64-LABEL: test_x86_avx512_psrlv_q_memop:
4977; X64:       ## %bb.0:
4978; X64-NEXT:    vpsrlvq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x07]
4979; X64-NEXT:    retq ## encoding: [0xc3]
4980  %b = load <8 x i64>, <8 x i64>* %ptr
4981  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
4982  ret <8 x i64> %res
4983}
4984
4985declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
4986
4987define <8 x double>@test_int_x86_avx512_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1) {
4988; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_512:
4989; CHECK:       ## %bb.0:
4990; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
4991; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
4992  %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
4993  ret <8 x double> %res
4994}
4995
4996define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
4997; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
4998; X86:       ## %bb.0:
4999; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5000; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5001; X86-NEXT:    vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
5002; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5003; X86-NEXT:    retl ## encoding: [0xc3]
5004;
5005; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
5006; X64:       ## %bb.0:
5007; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5008; X64-NEXT:    vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
5009; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5010; X64-NEXT:    retq ## encoding: [0xc3]
5011  %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5012  ret <8 x double> %res
5013}
5014
5015declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
5016
5017define <8 x double>@test_int_x86_avx512_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1) {
5018; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_512:
5019; CHECK:       ## %bb.0:
5020; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
5021; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5022  %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5023  ret <8 x double> %res
5024}
5025
5026define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5027; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5028; X86:       ## %bb.0:
5029; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5030; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5031; X86-NEXT:    vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
5032; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5033; X86-NEXT:    retl ## encoding: [0xc3]
5034;
5035; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5036; X64:       ## %bb.0:
5037; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5038; X64-NEXT:    vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
5039; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5040; X64-NEXT:    retq ## encoding: [0xc3]
5041  %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5042  ret <8 x double> %res
5043}
5044
5045define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
5046; CHECK-LABEL: test_x86_vcvtph2ps_512:
5047; CHECK:       ## %bb.0:
5048; CHECK-NEXT:    vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
5049; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5050  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
5051  ret <16 x float> %res
5052}
5053
5054define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
5055; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
5056; CHECK:       ## %bb.0:
5057; CHECK-NEXT:    vcvtph2ps {sae}, %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x18,0x13,0xc0]
5058; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5059  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
5060  ret <16 x float> %res
5061}
5062
5063define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
5064; X86-LABEL: test_x86_vcvtph2ps_512_rrk:
5065; X86:       ## %bb.0:
5066; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5067; X86-NEXT:    vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
5068; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5069; X86-NEXT:    retl ## encoding: [0xc3]
5070;
5071; X64-LABEL: test_x86_vcvtph2ps_512_rrk:
5072; X64:       ## %bb.0:
5073; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5074; X64-NEXT:    vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
5075; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5076; X64-NEXT:    retq ## encoding: [0xc3]
5077  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
5078  ret <16 x float> %res
5079}
5080
5081define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
5082; X86-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
5083; X86:       ## %bb.0:
5084; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5085; X86-NEXT:    vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
5086; X86-NEXT:    retl ## encoding: [0xc3]
5087;
5088; X64-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
5089; X64:       ## %bb.0:
5090; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5091; X64-NEXT:    vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
5092; X64-NEXT:    retq ## encoding: [0xc3]
5093  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
5094  ret <16 x float> %res
5095}
5096
5097define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
5098; X86-LABEL: test_x86_vcvtph2ps_512_rrkz:
5099; X86:       ## %bb.0:
5100; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5101; X86-NEXT:    vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
5102; X86-NEXT:    retl ## encoding: [0xc3]
5103;
5104; X64-LABEL: test_x86_vcvtph2ps_512_rrkz:
5105; X64:       ## %bb.0:
5106; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5107; X64-NEXT:    vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
5108; X64-NEXT:    retq ## encoding: [0xc3]
5109  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
5110  ret <16 x float> %res
5111}
5112
5113declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
5114
5115define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
5116; CHECK-LABEL: test_valign_q:
5117; CHECK:       ## %bb.0:
5118; CHECK-NEXT:    valignq $2, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x03,0xc1,0x02]
5119; CHECK-NEXT:    ## zmm0 = zmm1[2,3,4,5,6,7],zmm0[0,1]
5120; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5121  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
5122  ret <8 x i64> %res
5123}
5124
5125define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
5126; X86-LABEL: test_mask_valign_q:
5127; X86:       ## %bb.0:
5128; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5129; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5130; X86-NEXT:    valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
5131; X86-NEXT:    ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
5132; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5133; X86-NEXT:    retl ## encoding: [0xc3]
5134;
5135; X64-LABEL: test_mask_valign_q:
5136; X64:       ## %bb.0:
5137; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5138; X64-NEXT:    valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
5139; X64-NEXT:    ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
5140; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5141; X64-NEXT:    retq ## encoding: [0xc3]
5142  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
5143  ret <8 x i64> %res
5144}
5145
5146declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
5147
5148define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
5149; X86-LABEL: test_maskz_valign_d:
5150; X86:       ## %bb.0:
5151; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5152; X86-NEXT:    valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
5153; X86-NEXT:    ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
5154; X86-NEXT:    retl ## encoding: [0xc3]
5155;
5156; X64-LABEL: test_maskz_valign_d:
5157; X64:       ## %bb.0:
5158; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5159; X64-NEXT:    valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
5160; X64-NEXT:    ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
5161; X64-NEXT:    retq ## encoding: [0xc3]
5162  %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
5163  ret <16 x i32> %res
5164}
5165
5166declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
5167
5168declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
5169
5170define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
5171; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512:
5172; CHECK:       ## %bb.0:
5173; CHECK-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xc1]
5174; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5175  %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
5176  ret <8 x double> %res
5177}
5178
5179define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5180; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5181; X86:       ## %bb.0:
5182; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5183; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5184; X86-NEXT:    vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
5185; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5186; X86-NEXT:    retl ## encoding: [0xc3]
5187;
5188; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5189; X64:       ## %bb.0:
5190; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5191; X64-NEXT:    vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
5192; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5193; X64-NEXT:    retq ## encoding: [0xc3]
5194  %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
5195  ret <8 x double> %res
5196}
5197
5198define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5199; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_512:
5200; X86:       ## %bb.0:
5201; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5202; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5203; X86-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
5204; X86-NEXT:    retl ## encoding: [0xc3]
5205;
5206; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_512:
5207; X64:       ## %bb.0:
5208; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5209; X64-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
5210; X64-NEXT:    retq ## encoding: [0xc3]
5211  %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
5212  ret <8 x double> %res
5213}
5214
5215declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
5216
5217define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
5218; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512:
5219; CHECK:       ## %bb.0:
5220; CHECK-NEXT:    vpermilps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xc1]
5221; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5222  %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
5223  ret <16 x float> %res
5224}
5225
5226define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5227; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5228; X86:       ## %bb.0:
5229; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5230; X86-NEXT:    vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
5231; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5232; X86-NEXT:    retl ## encoding: [0xc3]
5233;
5234; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5235; X64:       ## %bb.0:
5236; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5237; X64-NEXT:    vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
5238; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5239; X64-NEXT:    retq ## encoding: [0xc3]
5240  %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
5241  ret <16 x float> %res
5242}
5243
5244
5245define <16 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) {
5246; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_512:
5247; X86:       ## %bb.0:
5248; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5249; X86-NEXT:    vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
5250; X86-NEXT:    retl ## encoding: [0xc3]
5251;
5252; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_512:
5253; X64:       ## %bb.0:
5254; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5255; X64-NEXT:    vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
5256; X64-NEXT:    retq ## encoding: [0xc3]
5257  %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
5258  ret <16 x float> %res
5259}
5260
5261; Test case to make sure we can print shuffle decode comments for constant pool loads.
5262define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5263; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
5264; X86:       ## %bb.0:
5265; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5266; X86-NEXT:    vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
5267; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
5268; X86-NEXT:    ## fixup A - offset: 6, value: LCPI299_0, kind: FK_Data_4
5269; X86-NEXT:    vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5270; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
5271; X86-NEXT:    ## fixup A - offset: 6, value: LCPI299_1, kind: FK_Data_4
5272; X86-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
5273; X86-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
5274; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
5275; X86-NEXT:    ## fixup A - offset: 6, value: LCPI299_2, kind: FK_Data_4
5276; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5277; X86-NEXT:    retl ## encoding: [0xc3]
5278;
5279; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
5280; X64:       ## %bb.0:
5281; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5282; X64-NEXT:    vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
5283; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
5284; X64-NEXT:    ## fixup A - offset: 6, value: LCPI299_0-4, kind: reloc_riprel_4byte
5285; X64-NEXT:    vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5286; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
5287; X64-NEXT:    ## fixup A - offset: 6, value: LCPI299_1-4, kind: reloc_riprel_4byte
5288; X64-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
5289; X64-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
5290; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
5291; X64-NEXT:    ## fixup A - offset: 6, value: LCPI299_2-4, kind: reloc_riprel_4byte
5292; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5293; X64-NEXT:    retq ## encoding: [0xc3]
5294  %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
5295  %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
5296  %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
5297  %res3 = fadd <16 x float> %res, %res1
5298  %res4 = fadd <16 x float> %res2, %res3
5299  ret <16 x float> %res4
5300}
5301
5302define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
5303; CHECK-LABEL: test_mask_mul_epi32_rr:
5304; CHECK:       ## %bb.0:
5305; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
5306; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5307  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5308  ret < 8 x i64> %res
5309}
5310
5311define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
5312; X86-LABEL: test_mask_mul_epi32_rrk:
5313; X86:       ## %bb.0:
5314; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5315; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5316; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
5317; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5318; X86-NEXT:    retl ## encoding: [0xc3]
5319;
5320; X64-LABEL: test_mask_mul_epi32_rrk:
5321; X64:       ## %bb.0:
5322; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5323; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
5324; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5325; X64-NEXT:    retq ## encoding: [0xc3]
5326  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5327  ret < 8 x i64> %res
5328}
5329
5330define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
5331; X86-LABEL: test_mask_mul_epi32_rrkz:
5332; X86:       ## %bb.0:
5333; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5334; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5335; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
5336; X86-NEXT:    retl ## encoding: [0xc3]
5337;
5338; X64-LABEL: test_mask_mul_epi32_rrkz:
5339; X64:       ## %bb.0:
5340; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5341; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
5342; X64-NEXT:    retq ## encoding: [0xc3]
5343  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5344  ret < 8 x i64> %res
5345}
5346
5347define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
5348; X86-LABEL: test_mask_mul_epi32_rm:
5349; X86:       ## %bb.0:
5350; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5351; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
5352; X86-NEXT:    retl ## encoding: [0xc3]
5353;
5354; X64-LABEL: test_mask_mul_epi32_rm:
5355; X64:       ## %bb.0:
5356; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
5357; X64-NEXT:    retq ## encoding: [0xc3]
5358  %b = load <16 x i32>, <16 x i32>* %ptr_b
5359  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5360  ret < 8 x i64> %res
5361}
5362
5363define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5364; X86-LABEL: test_mask_mul_epi32_rmk:
5365; X86:       ## %bb.0:
5366; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5367; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5368; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5369; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
5370; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5371; X86-NEXT:    retl ## encoding: [0xc3]
5372;
5373; X64-LABEL: test_mask_mul_epi32_rmk:
5374; X64:       ## %bb.0:
5375; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5376; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
5377; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5378; X64-NEXT:    retq ## encoding: [0xc3]
5379  %b = load <16 x i32>, <16 x i32>* %ptr_b
5380  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5381  ret < 8 x i64> %res
5382}
5383
5384define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
5385; X86-LABEL: test_mask_mul_epi32_rmkz:
5386; X86:       ## %bb.0:
5387; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5388; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5389; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5390; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
5391; X86-NEXT:    retl ## encoding: [0xc3]
5392;
5393; X64-LABEL: test_mask_mul_epi32_rmkz:
5394; X64:       ## %bb.0:
5395; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5396; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
5397; X64-NEXT:    retq ## encoding: [0xc3]
5398  %b = load <16 x i32>, <16 x i32>* %ptr_b
5399  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5400  ret < 8 x i64> %res
5401}
5402
5403define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
5404; X86-LABEL: test_mask_mul_epi32_rmb:
5405; X86:       ## %bb.0:
5406; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5407; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x00]
5408; X86-NEXT:    retl ## encoding: [0xc3]
5409;
5410; X64-LABEL: test_mask_mul_epi32_rmb:
5411; X64:       ## %bb.0:
5412; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
5413; X64-NEXT:    retq ## encoding: [0xc3]
5414  %q = load i64, i64* %ptr_b
5415  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5416  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5417  %b = bitcast <8 x i64> %b64 to <16 x i32>
5418  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5419  ret < 8 x i64> %res
5420}
5421
5422define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5423; X86-LABEL: test_mask_mul_epi32_rmbk:
5424; X86:       ## %bb.0:
5425; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5426; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5427; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5428; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x08]
5429; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5430; X86-NEXT:    retl ## encoding: [0xc3]
5431;
5432; X64-LABEL: test_mask_mul_epi32_rmbk:
5433; X64:       ## %bb.0:
5434; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5435; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
5436; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5437; X64-NEXT:    retq ## encoding: [0xc3]
5438  %q = load i64, i64* %ptr_b
5439  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5440  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5441  %b = bitcast <8 x i64> %b64 to <16 x i32>
5442  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5443  ret < 8 x i64> %res
5444}
5445
5446define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
5447; X86-LABEL: test_mask_mul_epi32_rmbkz:
5448; X86:       ## %bb.0:
5449; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5450; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5451; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5452; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x00]
5453; X86-NEXT:    retl ## encoding: [0xc3]
5454;
5455; X64-LABEL: test_mask_mul_epi32_rmbkz:
5456; X64:       ## %bb.0:
5457; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5458; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
5459; X64-NEXT:    retq ## encoding: [0xc3]
5460  %q = load i64, i64* %ptr_b
5461  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5462  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5463  %b = bitcast <8 x i64> %b64 to <16 x i32>
5464  %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5465  ret < 8 x i64> %res
5466}
5467
5468declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
5469
5470define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
5471; CHECK-LABEL: test_mask_mul_epu32_rr:
5472; CHECK:       ## %bb.0:
5473; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
5474; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5475  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5476  ret < 8 x i64> %res
5477}
5478
5479define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
5480; X86-LABEL: test_mask_mul_epu32_rrk:
5481; X86:       ## %bb.0:
5482; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5483; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5484; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
5485; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5486; X86-NEXT:    retl ## encoding: [0xc3]
5487;
5488; X64-LABEL: test_mask_mul_epu32_rrk:
5489; X64:       ## %bb.0:
5490; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5491; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
5492; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5493; X64-NEXT:    retq ## encoding: [0xc3]
5494  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5495  ret < 8 x i64> %res
5496}
5497
5498define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
5499; X86-LABEL: test_mask_mul_epu32_rrkz:
5500; X86:       ## %bb.0:
5501; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5502; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5503; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
5504; X86-NEXT:    retl ## encoding: [0xc3]
5505;
5506; X64-LABEL: test_mask_mul_epu32_rrkz:
5507; X64:       ## %bb.0:
5508; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5509; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
5510; X64-NEXT:    retq ## encoding: [0xc3]
5511  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5512  ret < 8 x i64> %res
5513}
5514
5515define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
5516; X86-LABEL: test_mask_mul_epu32_rm:
5517; X86:       ## %bb.0:
5518; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5519; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
5520; X86-NEXT:    retl ## encoding: [0xc3]
5521;
5522; X64-LABEL: test_mask_mul_epu32_rm:
5523; X64:       ## %bb.0:
5524; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
5525; X64-NEXT:    retq ## encoding: [0xc3]
5526  %b = load <16 x i32>, <16 x i32>* %ptr_b
5527  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5528  ret < 8 x i64> %res
5529}
5530
5531define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5532; X86-LABEL: test_mask_mul_epu32_rmk:
5533; X86:       ## %bb.0:
5534; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5535; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5536; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5537; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
5538; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5539; X86-NEXT:    retl ## encoding: [0xc3]
5540;
5541; X64-LABEL: test_mask_mul_epu32_rmk:
5542; X64:       ## %bb.0:
5543; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5544; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
5545; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5546; X64-NEXT:    retq ## encoding: [0xc3]
5547  %b = load <16 x i32>, <16 x i32>* %ptr_b
5548  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5549  ret < 8 x i64> %res
5550}
5551
5552define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
5553; X86-LABEL: test_mask_mul_epu32_rmkz:
5554; X86:       ## %bb.0:
5555; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5556; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5557; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5558; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
5559; X86-NEXT:    retl ## encoding: [0xc3]
5560;
5561; X64-LABEL: test_mask_mul_epu32_rmkz:
5562; X64:       ## %bb.0:
5563; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5564; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
5565; X64-NEXT:    retq ## encoding: [0xc3]
5566  %b = load <16 x i32>, <16 x i32>* %ptr_b
5567  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5568  ret < 8 x i64> %res
5569}
5570
5571define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
5572; X86-LABEL: test_mask_mul_epu32_rmb:
5573; X86:       ## %bb.0:
5574; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5575; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x00]
5576; X86-NEXT:    retl ## encoding: [0xc3]
5577;
5578; X64-LABEL: test_mask_mul_epu32_rmb:
5579; X64:       ## %bb.0:
5580; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
5581; X64-NEXT:    retq ## encoding: [0xc3]
5582  %q = load i64, i64* %ptr_b
5583  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5584  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5585  %b = bitcast <8 x i64> %b64 to <16 x i32>
5586  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5587  ret < 8 x i64> %res
5588}
5589
5590define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5591; X86-LABEL: test_mask_mul_epu32_rmbk:
5592; X86:       ## %bb.0:
5593; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5594; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5595; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5596; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x08]
5597; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5598; X86-NEXT:    retl ## encoding: [0xc3]
5599;
5600; X64-LABEL: test_mask_mul_epu32_rmbk:
5601; X64:       ## %bb.0:
5602; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5603; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
5604; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5605; X64-NEXT:    retq ## encoding: [0xc3]
5606  %q = load i64, i64* %ptr_b
5607  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5608  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5609  %b = bitcast <8 x i64> %b64 to <16 x i32>
5610  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5611  ret < 8 x i64> %res
5612}
5613
5614define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
5615; X86-LABEL: test_mask_mul_epu32_rmbkz:
5616; X86:       ## %bb.0:
5617; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5618; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5619; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5620; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x00]
5621; X86-NEXT:    retl ## encoding: [0xc3]
5622;
5623; X64-LABEL: test_mask_mul_epu32_rmbkz:
5624; X64:       ## %bb.0:
5625; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5626; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
5627; X64-NEXT:    retq ## encoding: [0xc3]
5628  %q = load i64, i64* %ptr_b
5629  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5630  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5631  %b = bitcast <8 x i64> %b64 to <16 x i32>
5632  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5633  ret < 8 x i64> %res
5634}
5635
5636declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
5637
5638define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
5639; X86-LABEL: test_mask_vextractf32x4:
5640; X86:       ## %bb.0:
5641; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5642; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5643; X86-NEXT:    vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5644; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5645; X86-NEXT:    retl ## encoding: [0xc3]
5646;
5647; X64-LABEL: test_mask_vextractf32x4:
5648; X64:       ## %bb.0:
5649; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5650; X64-NEXT:    vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5651; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5652; X64-NEXT:    retq ## encoding: [0xc3]
5653  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
5654  ret <4 x float> %res
5655}
5656
5657declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
5658
5659define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
5660; X86-LABEL: test_mask_vextracti64x4:
5661; X86:       ## %bb.0:
5662; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5663; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5664; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5665; X86-NEXT:    retl ## encoding: [0xc3]
5666;
5667; X64-LABEL: test_mask_vextracti64x4:
5668; X64:       ## %bb.0:
5669; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5670; X64-NEXT:    vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5671; X64-NEXT:    retq ## encoding: [0xc3]
5672  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask)
5673  ret <4 x i64> %res
5674}
5675
5676declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
5677
5678define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
5679; X86-LABEL: test_maskz_vextracti32x4:
5680; X86:       ## %bb.0:
5681; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5682; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5683; X86-NEXT:    vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5684; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5685; X86-NEXT:    retl ## encoding: [0xc3]
5686;
5687; X64-LABEL: test_maskz_vextracti32x4:
5688; X64:       ## %bb.0:
5689; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5690; X64-NEXT:    vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5691; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5692; X64-NEXT:    retq ## encoding: [0xc3]
5693  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
5694  ret <4 x i32> %res
5695}
5696
5697declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
5698
5699define <4 x double> @test_vextractf64x4(<8 x double> %a) {
5700; CHECK-LABEL: test_vextractf64x4:
5701; CHECK:       ## %bb.0:
5702; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc0,0x01]
5703; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5704  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1)
5705  ret <4 x double> %res
5706}
5707
5708declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
5709
5710declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
5711
5712define <16 x float>@test_int_x86_avx512_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3) {
5713; CHECK-LABEL: test_int_x86_avx512_insertf32x4_512:
5714; CHECK:       ## %bb.0:
5715; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xc1,0x01]
5716; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5717  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
5718  ret <16 x float> %res
5719}
5720
5721define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
5722; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5723; X86:       ## %bb.0:
5724; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5725; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5726; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5727; X86-NEXT:    retl ## encoding: [0xc3]
5728;
5729; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5730; X64:       ## %bb.0:
5731; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5732; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5733; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5734; X64-NEXT:    retq ## encoding: [0xc3]
5735  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
5736  ret <16 x float> %res
5737}
5738
5739define <16 x float>@test_int_x86_avx512_maskz_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, i16 %x4) {
5740; X86-LABEL: test_int_x86_avx512_maskz_insertf32x4_512:
5741; X86:       ## %bb.0:
5742; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5743; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5744; X86-NEXT:    retl ## encoding: [0xc3]
5745;
5746; X64-LABEL: test_int_x86_avx512_maskz_insertf32x4_512:
5747; X64:       ## %bb.0:
5748; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5749; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5750; X64-NEXT:    retq ## encoding: [0xc3]
5751  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
5752  ret <16 x float> %res
5753}
5754
5755declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
5756
5757define <16 x i32>@test_int_x86_avx512_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5758; CHECK-LABEL: test_int_x86_avx512_inserti32x4_512:
5759; CHECK:       ## %bb.0:
5760; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xc1,0x01]
5761; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5762  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
5763  ret <16 x i32> %res
5764}
5765
5766define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5767; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5768; X86:       ## %bb.0:
5769; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5770; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5771; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5772; X86-NEXT:    retl ## encoding: [0xc3]
5773;
5774; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5775; X64:       ## %bb.0:
5776; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5777; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5778; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5779; X64-NEXT:    retq ## encoding: [0xc3]
5780  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
5781  ret <16 x i32> %res
5782}
5783
5784define <16 x i32>@test_int_x86_avx512_maskz_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, i16 %x4) {
5785; X86-LABEL: test_int_x86_avx512_maskz_inserti32x4_512:
5786; X86:       ## %bb.0:
5787; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5788; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5789; X86-NEXT:    retl ## encoding: [0xc3]
5790;
5791; X64-LABEL: test_int_x86_avx512_maskz_inserti32x4_512:
5792; X64:       ## %bb.0:
5793; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5794; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5795; X64-NEXT:    retq ## encoding: [0xc3]
5796  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
5797  ret <16 x i32> %res
5798}
5799
5800declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
5801
5802define <8 x double>@test_int_x86_avx512_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3) {
5803; CHECK-LABEL: test_int_x86_avx512_insertf64x4_512:
5804; CHECK:       ## %bb.0:
5805; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc1,0x01]
5806; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5807  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
5808  ret <8 x double> %res
5809}
5810
5811define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
5812; X86-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5813; X86:       ## %bb.0:
5814; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5815; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5816; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5817; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5818; X86-NEXT:    retl ## encoding: [0xc3]
5819;
5820; X64-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5821; X64:       ## %bb.0:
5822; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5823; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5824; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5825; X64-NEXT:    retq ## encoding: [0xc3]
5826  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
5827  ret <8 x double> %res
5828}
5829
5830define <8 x double>@test_int_x86_avx512_maskz_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, i8 %x4) {
5831; X86-LABEL: test_int_x86_avx512_maskz_insertf64x4_512:
5832; X86:       ## %bb.0:
5833; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5834; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5835; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5836; X86-NEXT:    retl ## encoding: [0xc3]
5837;
5838; X64-LABEL: test_int_x86_avx512_maskz_insertf64x4_512:
5839; X64:       ## %bb.0:
5840; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5841; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5842; X64-NEXT:    retq ## encoding: [0xc3]
5843  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
5844  ret <8 x double> %res
5845}
5846
5847declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
5848
5849define <8 x i64>@test_int_x86_avx512_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3) {
5850; CHECK-LABEL: test_int_x86_avx512_inserti64x4_512:
5851; CHECK:       ## %bb.0:
5852; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc1,0x01]
5853; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5854  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
5855  ret <8 x i64> %res
5856}
5857
5858define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5859; X86-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5860; X86:       ## %bb.0:
5861; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5862; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5863; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5864; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5865; X86-NEXT:    retl ## encoding: [0xc3]
5866;
5867; X64-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5868; X64:       ## %bb.0:
5869; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5870; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5871; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5872; X64-NEXT:    retq ## encoding: [0xc3]
5873  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
5874  ret <8 x i64> %res
5875}
5876
5877define <8 x i64>@test_int_x86_avx512_maskz_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, i8 %x4) {
5878; X86-LABEL: test_int_x86_avx512_maskz_inserti64x4_512:
5879; X86:       ## %bb.0:
5880; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5881; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5882; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5883; X86-NEXT:    retl ## encoding: [0xc3]
5884;
5885; X64-LABEL: test_int_x86_avx512_maskz_inserti64x4_512:
5886; X64:       ## %bb.0:
5887; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5888; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5889; X64-NEXT:    retq ## encoding: [0xc3]
5890  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
5891  ret <8 x i64> %res
5892}
5893
5894define <8 x i64> @test_x86_avx512_movntdqa(i8* %a0) {
5895; X86-LABEL: test_x86_avx512_movntdqa:
5896; X86:       ## %bb.0:
5897; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5898; X86-NEXT:    vmovntdqa (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x00]
5899; X86-NEXT:    retl ## encoding: [0xc3]
5900;
5901; X64-LABEL: test_x86_avx512_movntdqa:
5902; X64:       ## %bb.0:
5903; X64-NEXT:    vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
5904; X64-NEXT:    retq ## encoding: [0xc3]
5905  %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %a0)
5906  ret <8 x i64> %res
5907}
5908
5909declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) nounwind readonly
5910
5911define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5912; CHECK-LABEL: test_cmp_d_512:
5913; CHECK:       ## %bb.0:
5914; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
5915; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
5916; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd1,0x02]
5917; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
5918; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05]
5919; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9]
5920; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5921; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5922; CHECK-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5923; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5924; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5925; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5926; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5927; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5928; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5929; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5930; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5931; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5932; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
5933; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5934; CHECK-NEXT:    ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5935; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5936; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
5937  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
5938  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5939  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
5940  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5941  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
5942  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5943  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
5944  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5945  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
5946  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5947  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
5948  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5949  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
5950  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5951  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
5952  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5953  ret <8 x i16> %vec7
5954}
5955
5956define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5957; X86-LABEL: test_mask_cmp_d_512:
5958; X86:       ## %bb.0:
5959; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5960; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5961; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5962; X86-NEXT:    vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5963; X86-NEXT:    vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5964; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5965; X86-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5966; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5967; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5968; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
5969; X86-NEXT:    vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5970; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5971; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5972; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5973; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5974; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5975; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5976; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5977; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5978; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5979; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5980; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
5981; X86-NEXT:    retl ## encoding: [0xc3]
5982;
5983; X64-LABEL: test_mask_cmp_d_512:
5984; X64:       ## %bb.0:
5985; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5986; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5987; X64-NEXT:    vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5988; X64-NEXT:    vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5989; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5990; X64-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5991; X64-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5992; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5993; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5994; X64-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5995; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5996; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5997; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5998; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5999; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6000; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6001; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6002; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6003; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6004; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
6005; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6006; X64-NEXT:    retq ## encoding: [0xc3]
6007  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
6008  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6009  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
6010  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6011  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
6012  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6013  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
6014  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6015  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
6016  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6017  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
6018  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6019  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
6020  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6021  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
6022  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6023  ret <8 x i16> %vec7
6024}
6025
6026declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
6027
6028define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
6029; CHECK-LABEL: test_ucmp_d_512:
6030; CHECK:       ## %bb.0:
6031; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
6032; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x01]
6033; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xd1,0x02]
6034; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
6035; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05]
6036; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06]
6037; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6038; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6039; CHECK-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6040; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
6041; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6042; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
6043; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6044; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6045; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6046; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6047; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6048; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6049; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
6050; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
6051; CHECK-NEXT:    ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
6052; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6053; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6054  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
6055  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6056  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
6057  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6058  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
6059  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6060  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
6061  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6062  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
6063  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6064  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
6065  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6066  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
6067  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6068  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
6069  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6070  ret <8 x i16> %vec7
6071}
6072
6073define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
6074; X86-LABEL: test_mask_ucmp_d_512:
6075; X86:       ## %bb.0:
6076; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6077; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6078; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
6079; X86-NEXT:    vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
6080; X86-NEXT:    vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
6081; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
6082; X86-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
6083; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
6084; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6085; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6086; X86-NEXT:    vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
6087; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
6088; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6089; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
6090; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6091; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
6092; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
6093; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
6094; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6095; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
6096; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
6097; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6098; X86-NEXT:    retl ## encoding: [0xc3]
6099;
6100; X64-LABEL: test_mask_ucmp_d_512:
6101; X64:       ## %bb.0:
6102; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6103; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
6104; X64-NEXT:    vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
6105; X64-NEXT:    vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
6106; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
6107; X64-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
6108; X64-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
6109; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6110; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6111; X64-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6112; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
6113; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6114; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
6115; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6116; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6117; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6118; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6119; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6120; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6121; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
6122; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6123; X64-NEXT:    retq ## encoding: [0xc3]
6124  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
6125  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6126  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
6127  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6128  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
6129  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6130  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
6131  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6132  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
6133  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6134  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
6135  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6136  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
6137  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6138  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
6139  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6140  ret <8 x i16> %vec7
6141}
6142
6143declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
6144
6145define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6146; CHECK-LABEL: test_cmp_q_512:
6147; CHECK:       ## %bb.0:
6148; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
6149; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6150; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0]
6151; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
6152; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
6153; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x05]
6154; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe1]
6155; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6156; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6157; CHECK-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6158; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6159; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6160; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6161; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6162; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6163; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6164; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6165; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6166; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6167; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
6168; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6169; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6170; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6171  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
6172  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6173  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
6174  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6175  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
6176  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6177  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
6178  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6179  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
6180  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6181  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
6182  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6183  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
6184  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6185  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
6186  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6187  ret <8 x i8> %vec7
6188}
6189
6190define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
6191; X86-LABEL: test_mask_cmp_q_512:
6192; X86:       ## %bb.0:
6193; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6194; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6195; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6196; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6197; X86-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
6198; X86-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
6199; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6200; X86-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
6201; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
6202; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6203; X86-NEXT:    movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
6204; X86-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6205; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
6206; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6207; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
6208; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6209; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
6210; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6211; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
6212; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6213; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
6214; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6215; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6216; X86-NEXT:    retl ## encoding: [0xc3]
6217;
6218; X64-LABEL: test_mask_cmp_q_512:
6219; X64:       ## %bb.0:
6220; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6221; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6222; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6223; X64-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
6224; X64-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
6225; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6226; X64-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
6227; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
6228; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6229; X64-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6230; X64-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6231; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6232; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6233; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6234; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6235; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6236; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6237; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6238; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6239; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6240; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
6241; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6242; X64-NEXT:    retq ## encoding: [0xc3]
6243  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
6244  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6245  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
6246  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6247  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
6248  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6249  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
6250  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6251  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
6252  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6253  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
6254  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6255  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
6256  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6257  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
6258  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6259  ret <8 x i8> %vec7
6260}
6261
6262declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
6263
6264define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6265; CHECK-LABEL: test_ucmp_q_512:
6266; CHECK:       ## %bb.0:
6267; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
6268; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6269; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01]
6270; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x02]
6271; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
6272; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd9,0x05]
6273; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x06]
6274; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6275; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6276; CHECK-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6277; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6278; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6279; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6280; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6281; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6282; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6283; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6284; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6285; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6286; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
6287; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6288; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6289; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6290  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
6291  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6292  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
6293  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6294  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
6295  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6296  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
6297  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6298  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
6299  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6300  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
6301  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6302  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
6303  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6304  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
6305  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6306  ret <8 x i8> %vec7
6307}
6308
6309define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
6310; X86-LABEL: test_mask_ucmp_q_512:
6311; X86:       ## %bb.0:
6312; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6313; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6314; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6315; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6316; X86-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
6317; X86-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
6318; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6319; X86-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
6320; X86-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
6321; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6322; X86-NEXT:    movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
6323; X86-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6324; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
6325; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6326; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
6327; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6328; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
6329; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6330; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
6331; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6332; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
6333; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6334; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6335; X86-NEXT:    retl ## encoding: [0xc3]
6336;
6337; X64-LABEL: test_mask_ucmp_q_512:
6338; X64:       ## %bb.0:
6339; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6340; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6341; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6342; X64-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
6343; X64-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
6344; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6345; X64-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
6346; X64-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
6347; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6348; X64-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6349; X64-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6350; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6351; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6352; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6353; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6354; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6355; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6356; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6357; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6358; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6359; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
6360; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6361; X64-NEXT:    retq ## encoding: [0xc3]
6362  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
6363  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6364  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
6365  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6366  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
6367  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6368  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
6369  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6370  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
6371  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6372  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
6373  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6374  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
6375  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6376  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
6377  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6378  ret <8 x i8> %vec7
6379}
6380
6381declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
6382
6383declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
6384
6385define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
6386; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6387; X86:       ## %bb.0:
6388; X86-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
6389; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
6390; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6391; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6392; X86-NEXT:    vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
6393; X86-NEXT:    vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
6394; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
6395; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6396; X86-NEXT:    retl ## encoding: [0xc3]
6397;
6398; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6399; X64:       ## %bb.0:
6400; X64-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
6401; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
6402; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6403; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6404; X64-NEXT:    vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
6405; X64-NEXT:    vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
6406; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
6407; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6408; X64-NEXT:    retq ## encoding: [0xc3]
6409
6410  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
6411  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6412  %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
6413  %res4 = fadd <16 x float> %res1, %res2
6414  %res5 = fadd <16 x float> %res3, %res4
6415  ret <16 x float> %res5
6416}
6417
6418define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(<4 x float>* %x0ptr, <16 x float> %x2, i16 %mask) {
6419; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
6420; X86:       ## %bb.0:
6421; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6422; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
6423; X86-NEXT:    vbroadcastf32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x00]
6424; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6425; X86-NEXT:    retl ## encoding: [0xc3]
6426;
6427; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
6428; X64:       ## %bb.0:
6429; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6430; X64-NEXT:    vbroadcastf32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x07]
6431; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6432; X64-NEXT:    retq ## encoding: [0xc3]
6433  %x0 = load <4 x float>, <4 x float>* %x0ptr
6434  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6435  ret <16 x float> %res
6436}
6437
6438declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
6439
6440define <8 x double>@test_int_x86_avx512_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2) {
6441; CHECK-LABEL: test_int_x86_avx512_broadcastf64x4_512:
6442; CHECK:       ## %bb.0:
6443; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6444; CHECK-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6445; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6446
6447  %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
6448  ret <8 x double> %res
6449}
6450
6451define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
6452; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6453; X86:       ## %bb.0:
6454; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6455; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6456; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6457; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
6458; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
6459; X86-NEXT:    retl ## encoding: [0xc3]
6460;
6461; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6462; X64:       ## %bb.0:
6463; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6464; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6465; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
6466; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
6467; X64-NEXT:    retq ## encoding: [0xc3]
6468
6469  %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6470  ret <8 x double> %res
6471}
6472
6473define <8 x double>@test_int_x86_avx512_maskz_broadcastf64x4_512(<4 x double> %x0, i8 %mask) {
6474; X86-LABEL: test_int_x86_avx512_maskz_broadcastf64x4_512:
6475; X86:       ## %bb.0:
6476; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6477; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6478; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6479; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
6480; X86-NEXT:    retl ## encoding: [0xc3]
6481;
6482; X64-LABEL: test_int_x86_avx512_maskz_broadcastf64x4_512:
6483; X64:       ## %bb.0:
6484; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6485; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6486; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
6487; X64-NEXT:    retq ## encoding: [0xc3]
6488
6489  %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
6490  ret <8 x double> %res
6491}
6492
6493define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(<4 x double>* %x0ptr, <8 x double> %x2, i8 %mask) {
6494; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
6495; X86:       ## %bb.0:
6496; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6497; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6498; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6499; X86-NEXT:    vbroadcastf64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x00]
6500; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6501; X86-NEXT:    retl ## encoding: [0xc3]
6502;
6503; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
6504; X64:       ## %bb.0:
6505; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6506; X64-NEXT:    vbroadcastf64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x07]
6507; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6508; X64-NEXT:    retq ## encoding: [0xc3]
6509
6510  %x0 = load <4 x double>, <4 x double>* %x0ptr
6511  %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6512  ret <8 x double> %res
6513}
6514
6515declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
6516
6517define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
6518; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6519; X86:       ## %bb.0:
6520; X86-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
6521; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
6522; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
6523; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6524; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
6525; X86-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
6526; X86-NEXT:    vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
6527; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
6528; X86-NEXT:    retl ## encoding: [0xc3]
6529;
6530; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6531; X64:       ## %bb.0:
6532; X64-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
6533; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
6534; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
6535; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6536; X64-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
6537; X64-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
6538; X64-NEXT:    vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
6539; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
6540; X64-NEXT:    retq ## encoding: [0xc3]
6541
6542  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
6543  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6544  %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
6545  %res4 = add <16 x i32> %res1, %res2
6546  %res5 = add <16 x i32> %res3, %res4
6547  ret <16 x i32> %res5
6548}
6549
6550define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(<4 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) {
6551; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
6552; X86:       ## %bb.0:
6553; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6554; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
6555; X86-NEXT:    vbroadcasti32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x00]
6556; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6557; X86-NEXT:    retl ## encoding: [0xc3]
6558;
6559; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
6560; X64:       ## %bb.0:
6561; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6562; X64-NEXT:    vbroadcasti32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x07]
6563; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6564; X64-NEXT:    retq ## encoding: [0xc3]
6565
6566  %x0 = load <4 x i32>, <4 x i32>* %x0ptr
6567  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6568  ret <16 x i32> %res
6569}
6570
6571declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
6572
6573define <8 x i64>@test_int_x86_avx512_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2) {
6574; CHECK-LABEL: test_int_x86_avx512_broadcasti64x4_512:
6575; CHECK:       ## %bb.0:
6576; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6577; CHECK-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6578; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6579
6580  %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
6581  ret <8 x i64> %res
6582}
6583
6584define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
6585; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6586; X86:       ## %bb.0:
6587; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6588; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6589; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6590; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
6591; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6592; X86-NEXT:    retl ## encoding: [0xc3]
6593;
6594; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6595; X64:       ## %bb.0:
6596; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6597; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6598; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
6599; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6600; X64-NEXT:    retq ## encoding: [0xc3]
6601
6602  %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6603  ret <8 x i64> %res
6604}
6605
6606define <8 x i64>@test_int_x86_avx512_maskz_broadcasti64x4_512(<4 x i64> %x0, i8 %mask) {
6607; X86-LABEL: test_int_x86_avx512_maskz_broadcasti64x4_512:
6608; X86:       ## %bb.0:
6609; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6610; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6611; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6612; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
6613; X86-NEXT:    retl ## encoding: [0xc3]
6614;
6615; X64-LABEL: test_int_x86_avx512_maskz_broadcasti64x4_512:
6616; X64:       ## %bb.0:
6617; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
6618; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6619; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
6620; X64-NEXT:    retq ## encoding: [0xc3]
6621
6622  %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
6623  ret <8 x i64> %res
6624}
6625
6626define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(<4 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) {
6627; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
6628; X86:       ## %bb.0:
6629; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6630; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6631; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6632; X86-NEXT:    vbroadcasti64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x00]
6633; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6634; X86-NEXT:    retl ## encoding: [0xc3]
6635;
6636; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
6637; X64:       ## %bb.0:
6638; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6639; X64-NEXT:    vbroadcasti64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x07]
6640; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6641; X64-NEXT:    retq ## encoding: [0xc3]
6642
6643  %x0 = load <4 x i64>, <4 x i64>* %x0ptr
6644  %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6645  ret <8 x i64> %res
6646}
6647
6648declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
6649
6650define <16 x i32>@test_int_x86_avx512_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1) {
6651; CHECK-LABEL: test_int_x86_avx512_pabs_d_512:
6652; CHECK:       ## %bb.0:
6653; CHECK-NEXT:    vpabsd %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
6654; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6655  %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
6656  ret <16 x i32> %res
6657}
6658
6659define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
6660; X86-LABEL: test_int_x86_avx512_mask_pabs_d_512:
6661; X86:       ## %bb.0:
6662; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6663; X86-NEXT:    vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
6664; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6665; X86-NEXT:    retl ## encoding: [0xc3]
6666;
6667; X64-LABEL: test_int_x86_avx512_mask_pabs_d_512:
6668; X64:       ## %bb.0:
6669; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6670; X64-NEXT:    vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
6671; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6672; X64-NEXT:    retq ## encoding: [0xc3]
6673  %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
6674  ret <16 x i32> %res
6675}
6676
6677declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
6678
6679define <8 x i64>@test_int_x86_avx512_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6680; CHECK-LABEL: test_int_x86_avx512_pabs_q_512:
6681; CHECK:       ## %bb.0:
6682; CHECK-NEXT:    vpabsq %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
6683; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6684  %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
6685  ret <8 x i64> %res
6686}
6687
6688define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6689; X86-LABEL: test_int_x86_avx512_mask_pabs_q_512:
6690; X86:       ## %bb.0:
6691; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6692; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6693; X86-NEXT:    vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
6694; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6695; X86-NEXT:    retl ## encoding: [0xc3]
6696;
6697; X64-LABEL: test_int_x86_avx512_mask_pabs_q_512:
6698; X64:       ## %bb.0:
6699; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6700; X64-NEXT:    vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
6701; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6702; X64-NEXT:    retq ## encoding: [0xc3]
6703  %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
6704  ret <8 x i64> %res
6705}
6706
6707define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
6708; X86-LABEL: test_vptestmq:
6709; X86:       ## %bb.0:
6710; X86-NEXT:    vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
6711; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6712; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
6713; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
6714; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
6715; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6716; X86-NEXT:    retl ## encoding: [0xc3]
6717;
6718; X64-LABEL: test_vptestmq:
6719; X64:       ## %bb.0:
6720; X64-NEXT:    vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
6721; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6722; X64-NEXT:    andb %al, %dil ## encoding: [0x40,0x20,0xc7]
6723; X64-NEXT:    addb %dil, %al ## encoding: [0x40,0x00,0xf8]
6724; X64-NEXT:    ## kill: def $al killed $al killed $eax
6725; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6726; X64-NEXT:    retq ## encoding: [0xc3]
6727  %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
6728  %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
6729  %res2 = add i8 %res1, %res
6730  ret i8 %res2
6731}
6732declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
6733
6734define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
6735; X86-LABEL: test_vptestmd:
6736; X86:       ## %bb.0:
6737; X86-NEXT:    vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6738; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6739; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6740; X86-NEXT:    andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6741; X86-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
6742; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6743; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6744; X86-NEXT:    retl ## encoding: [0xc3]
6745;
6746; X64-LABEL: test_vptestmd:
6747; X64:       ## %bb.0:
6748; X64-NEXT:    vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6749; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6750; X64-NEXT:    andl %eax, %edi ## encoding: [0x21,0xc7]
6751; X64-NEXT:    addl %edi, %eax ## encoding: [0x01,0xf8]
6752; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6753; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6754; X64-NEXT:    retq ## encoding: [0xc3]
6755  %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
6756  %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
6757  %res2 = add i16 %res1, %res
6758  ret i16 %res2
6759}
6760declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
6761
6762declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
6763
6764define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
6765; X86-LABEL: test_int_x86_avx512_ptestnm_d_512:
6766; X86:       ## %bb.0:
6767; X86-NEXT:    vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6768; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6769; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6770; X86-NEXT:    andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6771; X86-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
6772; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6773; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6774; X86-NEXT:    retl ## encoding: [0xc3]
6775;
6776; X64-LABEL: test_int_x86_avx512_ptestnm_d_512:
6777; X64:       ## %bb.0:
6778; X64-NEXT:    vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6779; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6780; X64-NEXT:    andl %eax, %edi ## encoding: [0x21,0xc7]
6781; X64-NEXT:    addl %edi, %eax ## encoding: [0x01,0xf8]
6782; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6783; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6784; X64-NEXT:    retq ## encoding: [0xc3]
6785  %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
6786  %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
6787  %res2 = add i16 %res, %res1
6788  ret i16 %res2
6789}
6790
6791declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
6792
6793define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6794; X86-LABEL: test_int_x86_avx512_ptestnm_q_512:
6795; X86:       ## %bb.0:
6796; X86-NEXT:    vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6797; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6798; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
6799; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
6800; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
6801; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6802; X86-NEXT:    retl ## encoding: [0xc3]
6803;
6804; X64-LABEL: test_int_x86_avx512_ptestnm_q_512:
6805; X64:       ## %bb.0:
6806; X64-NEXT:    vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6807; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6808; X64-NEXT:    andb %al, %dil ## encoding: [0x40,0x20,0xc7]
6809; X64-NEXT:    addb %dil, %al ## encoding: [0x40,0x00,0xf8]
6810; X64-NEXT:    ## kill: def $al killed $al killed $eax
6811; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6812; X64-NEXT:    retq ## encoding: [0xc3]
6813  %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
6814  %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
6815  %res2 = add i8 %res, %res1
6816  ret i8 %res2
6817}
6818
6819declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
6820define i16 @test_kand(i16 %a0, i16 %a1) {
6821; X86-LABEL: test_kand:
6822; X86:       ## %bb.0:
6823; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6824; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6825; X86-NEXT:    andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6826; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6827; X86-NEXT:    retl ## encoding: [0xc3]
6828;
6829; X64-LABEL: test_kand:
6830; X64:       ## %bb.0:
6831; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6832; X64-NEXT:    andl %esi, %eax ## encoding: [0x21,0xf0]
6833; X64-NEXT:    andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6834; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6835; X64-NEXT:    retq ## encoding: [0xc3]
6836  %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
6837  %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
6838  ret i16 %t2
6839}
6840
6841declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
6842define i16 @test_kandn(i16 %a0, i16 %a1) {
6843; X86-LABEL: test_kandn:
6844; X86:       ## %bb.0:
6845; X86-NEXT:    movl $65527, %eax ## encoding: [0xb8,0xf7,0xff,0x00,0x00]
6846; X86-NEXT:    ## imm = 0xFFF7
6847; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax ## encoding: [0x0b,0x44,0x24,0x04]
6848; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6849; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6850; X86-NEXT:    retl ## encoding: [0xc3]
6851;
6852; X64-LABEL: test_kandn:
6853; X64:       ## %bb.0:
6854; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6855; X64-NEXT:    orl $-9, %eax ## encoding: [0x83,0xc8,0xf7]
6856; X64-NEXT:    andl %esi, %eax ## encoding: [0x21,0xf0]
6857; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6858; X64-NEXT:    retq ## encoding: [0xc3]
6859  %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
6860  %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
6861  ret i16 %t2
6862}
6863
6864declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
6865define i16 @test_knot(i16 %a0) {
6866; X86-LABEL: test_knot:
6867; X86:       ## %bb.0:
6868; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6869; X86-NEXT:    notl %eax ## encoding: [0xf7,0xd0]
6870; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6871; X86-NEXT:    retl ## encoding: [0xc3]
6872;
6873; X64-LABEL: test_knot:
6874; X64:       ## %bb.0:
6875; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6876; X64-NEXT:    notl %eax ## encoding: [0xf7,0xd0]
6877; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6878; X64-NEXT:    retq ## encoding: [0xc3]
6879  %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
6880  ret i16 %res
6881}
6882
6883declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
6884define i16 @test_kor(i16 %a0, i16 %a1) {
6885; X86-LABEL: test_kor:
6886; X86:       ## %bb.0:
6887; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6888; X86-NEXT:    orw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x0b,0x44,0x24,0x08]
6889; X86-NEXT:    orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6890; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6891; X86-NEXT:    retl ## encoding: [0xc3]
6892;
6893; X64-LABEL: test_kor:
6894; X64:       ## %bb.0:
6895; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6896; X64-NEXT:    orl %esi, %eax ## encoding: [0x09,0xf0]
6897; X64-NEXT:    orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6898; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6899; X64-NEXT:    retq ## encoding: [0xc3]
6900  %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
6901  %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
6902  ret i16 %t2
6903}
6904
6905declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
6906; TODO: the two kxnor instructions here a no op and should be elimintaed,
6907; probably by FoldConstantArithmetic in SelectionDAG.
6908define i16 @test_kxnor(i16 %a0, i16 %a1) {
6909; X86-LABEL: test_kxnor:
6910; X86:       ## %bb.0:
6911; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6912; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6913; X86-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6914; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6915; X86-NEXT:    retl ## encoding: [0xc3]
6916;
6917; X64-LABEL: test_kxnor:
6918; X64:       ## %bb.0:
6919; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6920; X64-NEXT:    xorl %esi, %eax ## encoding: [0x31,0xf0]
6921; X64-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6922; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6923; X64-NEXT:    retq ## encoding: [0xc3]
6924  %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
6925  %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
6926  ret i16 %t2
6927}
6928
6929declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
6930define i16 @test_kxor(i16 %a0, i16 %a1) {
6931; X86-LABEL: test_kxor:
6932; X86:       ## %bb.0:
6933; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6934; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6935; X86-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6936; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
6937; X86-NEXT:    retl ## encoding: [0xc3]
6938;
6939; X64-LABEL: test_kxor:
6940; X64:       ## %bb.0:
6941; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
6942; X64-NEXT:    xorl %esi, %eax ## encoding: [0x31,0xf0]
6943; X64-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6944; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
6945; X64-NEXT:    retq ## encoding: [0xc3]
6946  %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
6947  %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
6948  ret i16 %t2
6949}
6950
6951declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
6952define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6953; CHECK-LABEL: test_kortestz:
6954; CHECK:       ## %bb.0: ## %entry
6955; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6956; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6957; CHECK-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
6958; CHECK-NEXT:    kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6959; CHECK-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
6960; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6961; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6962entry:
6963  %0 = bitcast <8 x i64> %A to <16 x i32>
6964  %1 = bitcast <8 x i64> %B to <16 x i32>
6965  %2 = icmp ne <16 x i32> %0, %1
6966  %3 = bitcast <8 x i64> %C to <16 x i32>
6967  %4 = bitcast <8 x i64> %D to <16 x i32>
6968  %5 = icmp ne <16 x i32> %3, %4
6969  %6 = bitcast <16 x i1> %2 to i16
6970  %7 = bitcast <16 x i1> %5 to i16
6971  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6972  ret i32 %res
6973}
6974
6975declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
6976define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6977; CHECK-LABEL: test_kortestc:
6978; CHECK:       ## %bb.0: ## %entry
6979; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6980; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6981; CHECK-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
6982; CHECK-NEXT:    kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6983; CHECK-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
6984; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
6985; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
6986entry:
6987  %0 = bitcast <8 x i64> %A to <16 x i32>
6988  %1 = bitcast <8 x i64> %B to <16 x i32>
6989  %2 = icmp ne <16 x i32> %0, %1
6990  %3 = bitcast <8 x i64> %C to <16 x i32>
6991  %4 = bitcast <8 x i64> %D to <16 x i32>
6992  %5 = icmp ne <16 x i32> %3, %4
6993  %6 = bitcast <16 x i1> %2 to i16
6994  %7 = bitcast <16 x i1> %5 to i16
6995  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6996  ret i32 %res
6997}
6998
6999define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
7000; CHECK-LABEL: test_cmpps:
7001; CHECK:       ## %bb.0:
7002; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
7003; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7004; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
7005; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
7006; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7007  %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
7008  ret i16 %res
7009}
7010declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
7011
7012define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
7013; CHECK-LABEL: test_cmppd:
7014; CHECK:       ## %bb.0:
7015; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
7016; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7017; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
7018; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
7019; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7020  %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
7021  ret i8 %res
7022}
7023declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
7024
7025define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
7026; CHECK-LABEL: test_mul_epi32_rr:
7027; CHECK:       ## %bb.0:
7028; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
7029; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7030  %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7031  ret < 8 x i64> %res
7032}
7033
7034define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
7035; X86-LABEL: test_mul_epi32_rrk:
7036; X86:       ## %bb.0:
7037; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7038; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7039; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
7040; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7041; X86-NEXT:    retl ## encoding: [0xc3]
7042;
7043; X64-LABEL: test_mul_epi32_rrk:
7044; X64:       ## %bb.0:
7045; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7046; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
7047; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7048; X64-NEXT:    retq ## encoding: [0xc3]
7049  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7050  %mask.cast = bitcast i8 %mask to <8 x i1>
7051  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7052  ret < 8 x i64> %res
7053}
7054
7055define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
7056; X86-LABEL: test_mul_epi32_rrkz:
7057; X86:       ## %bb.0:
7058; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7059; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7060; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
7061; X86-NEXT:    retl ## encoding: [0xc3]
7062;
7063; X64-LABEL: test_mul_epi32_rrkz:
7064; X64:       ## %bb.0:
7065; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7066; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
7067; X64-NEXT:    retq ## encoding: [0xc3]
7068  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7069  %mask.cast = bitcast i8 %mask to <8 x i1>
7070  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7071  ret < 8 x i64> %res
7072}
7073
7074define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
7075; X86-LABEL: test_mul_epi32_rm:
7076; X86:       ## %bb.0:
7077; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7078; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
7079; X86-NEXT:    retl ## encoding: [0xc3]
7080;
7081; X64-LABEL: test_mul_epi32_rm:
7082; X64:       ## %bb.0:
7083; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
7084; X64-NEXT:    retq ## encoding: [0xc3]
7085  %b = load <16 x i32>, <16 x i32>* %ptr_b
7086  %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7087  ret < 8 x i64> %res
7088}
7089
7090define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7091; X86-LABEL: test_mul_epi32_rmk:
7092; X86:       ## %bb.0:
7093; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7094; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7095; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7096; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
7097; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7098; X86-NEXT:    retl ## encoding: [0xc3]
7099;
7100; X64-LABEL: test_mul_epi32_rmk:
7101; X64:       ## %bb.0:
7102; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7103; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
7104; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7105; X64-NEXT:    retq ## encoding: [0xc3]
7106  %b = load <16 x i32>, <16 x i32>* %ptr_b
7107  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7108  %mask.cast = bitcast i8 %mask to <8 x i1>
7109  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7110  ret < 8 x i64> %res
7111}
7112
7113define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
7114; X86-LABEL: test_mul_epi32_rmkz:
7115; X86:       ## %bb.0:
7116; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7117; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7118; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7119; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
7120; X86-NEXT:    retl ## encoding: [0xc3]
7121;
7122; X64-LABEL: test_mul_epi32_rmkz:
7123; X64:       ## %bb.0:
7124; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7125; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
7126; X64-NEXT:    retq ## encoding: [0xc3]
7127  %b = load <16 x i32>, <16 x i32>* %ptr_b
7128  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7129  %mask.cast = bitcast i8 %mask to <8 x i1>
7130  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7131  ret < 8 x i64> %res
7132}
7133
7134define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
7135; X86-LABEL: test_mul_epi32_rmb:
7136; X86:       ## %bb.0:
7137; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7138; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x00]
7139; X86-NEXT:    retl ## encoding: [0xc3]
7140;
7141; X64-LABEL: test_mul_epi32_rmb:
7142; X64:       ## %bb.0:
7143; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
7144; X64-NEXT:    retq ## encoding: [0xc3]
7145  %q = load i64, i64* %ptr_b
7146  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7147  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7148  %b = bitcast <8 x i64> %b64 to <16 x i32>
7149  %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7150  ret < 8 x i64> %res
7151}
7152
7153define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7154; X86-LABEL: test_mul_epi32_rmbk:
7155; X86:       ## %bb.0:
7156; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7157; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7158; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7159; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x08]
7160; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7161; X86-NEXT:    retl ## encoding: [0xc3]
7162;
7163; X64-LABEL: test_mul_epi32_rmbk:
7164; X64:       ## %bb.0:
7165; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7166; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
7167; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7168; X64-NEXT:    retq ## encoding: [0xc3]
7169  %q = load i64, i64* %ptr_b
7170  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7171  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7172  %b = bitcast <8 x i64> %b64 to <16 x i32>
7173  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7174  %mask.cast = bitcast i8 %mask to <8 x i1>
7175  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7176  ret < 8 x i64> %res
7177}
7178
7179define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
7180; X86-LABEL: test_mul_epi32_rmbkz:
7181; X86:       ## %bb.0:
7182; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7183; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7184; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7185; X86-NEXT:    vpmuldq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x00]
7186; X86-NEXT:    retl ## encoding: [0xc3]
7187;
7188; X64-LABEL: test_mul_epi32_rmbkz:
7189; X64:       ## %bb.0:
7190; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7191; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
7192; X64-NEXT:    retq ## encoding: [0xc3]
7193  %q = load i64, i64* %ptr_b
7194  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7195  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7196  %b = bitcast <8 x i64> %b64 to <16 x i32>
7197  %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7198  %mask.cast = bitcast i8 %mask to <8 x i1>
7199  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7200  ret < 8 x i64> %res
7201}
7202
7203declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>)
7204
7205define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
7206; CHECK-LABEL: test_mul_epu32_rr:
7207; CHECK:       ## %bb.0:
7208; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
7209; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7210  %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7211  ret < 8 x i64> %res
7212}
7213
7214define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
7215; X86-LABEL: test_mul_epu32_rrk:
7216; X86:       ## %bb.0:
7217; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7218; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7219; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
7220; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7221; X86-NEXT:    retl ## encoding: [0xc3]
7222;
7223; X64-LABEL: test_mul_epu32_rrk:
7224; X64:       ## %bb.0:
7225; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7226; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
7227; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7228; X64-NEXT:    retq ## encoding: [0xc3]
7229  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7230  %mask.cast = bitcast i8 %mask to <8 x i1>
7231  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7232  ret < 8 x i64> %res
7233}
7234
7235define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
7236; X86-LABEL: test_mul_epu32_rrkz:
7237; X86:       ## %bb.0:
7238; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7239; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7240; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
7241; X86-NEXT:    retl ## encoding: [0xc3]
7242;
7243; X64-LABEL: test_mul_epu32_rrkz:
7244; X64:       ## %bb.0:
7245; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7246; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
7247; X64-NEXT:    retq ## encoding: [0xc3]
7248  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7249  %mask.cast = bitcast i8 %mask to <8 x i1>
7250  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7251  ret < 8 x i64> %res
7252}
7253
7254define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
7255; X86-LABEL: test_mul_epu32_rm:
7256; X86:       ## %bb.0:
7257; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7258; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
7259; X86-NEXT:    retl ## encoding: [0xc3]
7260;
7261; X64-LABEL: test_mul_epu32_rm:
7262; X64:       ## %bb.0:
7263; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
7264; X64-NEXT:    retq ## encoding: [0xc3]
7265  %b = load <16 x i32>, <16 x i32>* %ptr_b
7266  %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7267  ret < 8 x i64> %res
7268}
7269
7270define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7271; X86-LABEL: test_mul_epu32_rmk:
7272; X86:       ## %bb.0:
7273; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7274; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7275; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7276; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
7277; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7278; X86-NEXT:    retl ## encoding: [0xc3]
7279;
7280; X64-LABEL: test_mul_epu32_rmk:
7281; X64:       ## %bb.0:
7282; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7283; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
7284; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7285; X64-NEXT:    retq ## encoding: [0xc3]
7286  %b = load <16 x i32>, <16 x i32>* %ptr_b
7287  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7288  %mask.cast = bitcast i8 %mask to <8 x i1>
7289  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7290  ret < 8 x i64> %res
7291}
7292
7293define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
7294; X86-LABEL: test_mul_epu32_rmkz:
7295; X86:       ## %bb.0:
7296; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7297; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7298; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7299; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
7300; X86-NEXT:    retl ## encoding: [0xc3]
7301;
7302; X64-LABEL: test_mul_epu32_rmkz:
7303; X64:       ## %bb.0:
7304; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7305; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
7306; X64-NEXT:    retq ## encoding: [0xc3]
7307  %b = load <16 x i32>, <16 x i32>* %ptr_b
7308  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7309  %mask.cast = bitcast i8 %mask to <8 x i1>
7310  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7311  ret < 8 x i64> %res
7312}
7313
7314define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
7315; X86-LABEL: test_mul_epu32_rmb:
7316; X86:       ## %bb.0:
7317; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7318; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x00]
7319; X86-NEXT:    retl ## encoding: [0xc3]
7320;
7321; X64-LABEL: test_mul_epu32_rmb:
7322; X64:       ## %bb.0:
7323; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
7324; X64-NEXT:    retq ## encoding: [0xc3]
7325  %q = load i64, i64* %ptr_b
7326  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7327  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7328  %b = bitcast <8 x i64> %b64 to <16 x i32>
7329  %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7330  ret < 8 x i64> %res
7331}
7332
7333define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7334; X86-LABEL: test_mul_epu32_rmbk:
7335; X86:       ## %bb.0:
7336; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7337; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7338; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7339; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x08]
7340; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7341; X86-NEXT:    retl ## encoding: [0xc3]
7342;
7343; X64-LABEL: test_mul_epu32_rmbk:
7344; X64:       ## %bb.0:
7345; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7346; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
7347; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7348; X64-NEXT:    retq ## encoding: [0xc3]
7349  %q = load i64, i64* %ptr_b
7350  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7351  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7352  %b = bitcast <8 x i64> %b64 to <16 x i32>
7353  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7354  %mask.cast = bitcast i8 %mask to <8 x i1>
7355  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7356  ret < 8 x i64> %res
7357}
7358
7359define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
7360; X86-LABEL: test_mul_epu32_rmbkz:
7361; X86:       ## %bb.0:
7362; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7363; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7364; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7365; X86-NEXT:    vpmuludq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x00]
7366; X86-NEXT:    retl ## encoding: [0xc3]
7367;
7368; X64-LABEL: test_mul_epu32_rmbkz:
7369; X64:       ## %bb.0:
7370; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7371; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
7372; X64-NEXT:    retq ## encoding: [0xc3]
7373  %q = load i64, i64* %ptr_b
7374  %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7375  %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7376  %b = bitcast <8 x i64> %b64 to <16 x i32>
7377  %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7378  %mask.cast = bitcast i8 %mask to <8 x i1>
7379  %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7380  ret < 8 x i64> %res
7381}
7382
7383declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>)
7384
7385define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
7386; X86-LABEL: test_x86_avx512_mm_cvtu32_sd:
7387; X86:       ## %bb.0:
7388; X86-NEXT:    vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0x44,0x24,0x01]
7389; X86-NEXT:    retl ## encoding: [0xc3]
7390;
7391; X64-LABEL: test_x86_avx512_mm_cvtu32_sd:
7392; X64:       ## %bb.0:
7393; X64-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0xc7]
7394; X64-NEXT:    retq ## encoding: [0xc3]
7395{
7396  %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
7397  ret <2 x double> %res
7398}
7399declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
7400
7401define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
7402; X86-LABEL: test_x86_vbroadcast_ss_512:
7403; X86:       ## %bb.0:
7404; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7405; X86-NEXT:    vbroadcastss (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x00]
7406; X86-NEXT:    retl ## encoding: [0xc3]
7407;
7408; X64-LABEL: test_x86_vbroadcast_ss_512:
7409; X64:       ## %bb.0:
7410; X64-NEXT:    vbroadcastss (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
7411; X64-NEXT:    retq ## encoding: [0xc3]
7412  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
7413  ret <16 x float> %res
7414}
7415declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
7416
7417define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
7418; X86-LABEL: test_x86_vbroadcast_sd_512:
7419; X86:       ## %bb.0:
7420; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7421; X86-NEXT:    vbroadcastsd (%eax), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x00]
7422; X86-NEXT:    retl ## encoding: [0xc3]
7423;
7424; X64-LABEL: test_x86_vbroadcast_sd_512:
7425; X64:       ## %bb.0:
7426; X64-NEXT:    vbroadcastsd (%rdi), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
7427; X64-NEXT:    retq ## encoding: [0xc3]
7428  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
7429  ret <8 x double> %res
7430}
7431declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
7432
7433declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
7434
7435define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
7436; CHECK-LABEL: test_int_x86_avx512_permvar_df_512:
7437; CHECK:       ## %bb.0:
7438; CHECK-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xc0]
7439; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7440  %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
7441  ret <8 x double> %res
7442}
7443
7444define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
7445; X86-LABEL: test_int_x86_avx512_mask_permvar_df_512:
7446; X86:       ## %bb.0:
7447; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7448; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7449; X86-NEXT:    vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
7450; X86-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
7451; X86-NEXT:    retl ## encoding: [0xc3]
7452;
7453; X64-LABEL: test_int_x86_avx512_mask_permvar_df_512:
7454; X64:       ## %bb.0:
7455; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7456; X64-NEXT:    vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
7457; X64-NEXT:    vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
7458; X64-NEXT:    retq ## encoding: [0xc3]
7459  %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
7460  ret <8 x double> %res
7461}
7462
7463define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) {
7464; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_512:
7465; X86:       ## %bb.0:
7466; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7467; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7468; X86-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
7469; X86-NEXT:    retl ## encoding: [0xc3]
7470;
7471; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_512:
7472; X64:       ## %bb.0:
7473; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7474; X64-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
7475; X64-NEXT:    retq ## encoding: [0xc3]
7476  %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
7477  ret <8 x double> %res
7478}
7479
7480declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7481
7482define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7483; CHECK-LABEL: test_int_x86_avx512_permvar_di_512:
7484; CHECK:       ## %bb.0:
7485; CHECK-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xc0]
7486; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7487  %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7488  ret <8 x i64> %res
7489}
7490
7491define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7492; X86-LABEL: test_int_x86_avx512_mask_permvar_di_512:
7493; X86:       ## %bb.0:
7494; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7495; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7496; X86-NEXT:    vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
7497; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7498; X86-NEXT:    retl ## encoding: [0xc3]
7499;
7500; X64-LABEL: test_int_x86_avx512_mask_permvar_di_512:
7501; X64:       ## %bb.0:
7502; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7503; X64-NEXT:    vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
7504; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7505; X64-NEXT:    retq ## encoding: [0xc3]
7506  %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7507  ret <8 x i64> %res
7508}
7509
7510define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
7511; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_512:
7512; X86:       ## %bb.0:
7513; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7514; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7515; X86-NEXT:    vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
7516; X86-NEXT:    retl ## encoding: [0xc3]
7517;
7518; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_512:
7519; X64:       ## %bb.0:
7520; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7521; X64-NEXT:    vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
7522; X64-NEXT:    retq ## encoding: [0xc3]
7523  %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
7524  ret <8 x i64> %res
7525}
7526
7527declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
7528
7529define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
7530; CHECK-LABEL: test_int_x86_avx512_permvar_sf_512:
7531; CHECK:       ## %bb.0:
7532; CHECK-NEXT:    vpermps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7533; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7534  %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
7535  ret <16 x float> %res
7536}
7537
7538define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
7539; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
7540; X86:       ## %bb.0:
7541; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7542; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
7543; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7544; X86-NEXT:    retl ## encoding: [0xc3]
7545;
7546; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
7547; X64:       ## %bb.0:
7548; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7549; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
7550; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7551; X64-NEXT:    retq ## encoding: [0xc3]
7552  %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
7553  ret <16 x float> %res
7554}
7555
7556define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) {
7557; X86-LABEL: test_int_x86_avx512_maskz_permvar_sf_512:
7558; X86:       ## %bb.0:
7559; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7560; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
7561; X86-NEXT:    retl ## encoding: [0xc3]
7562;
7563; X64-LABEL: test_int_x86_avx512_maskz_permvar_sf_512:
7564; X64:       ## %bb.0:
7565; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7566; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
7567; X64-NEXT:    retq ## encoding: [0xc3]
7568  %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
7569  ret <16 x float> %res
7570}
7571
7572declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7573
7574define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7575; CHECK-LABEL: test_int_x86_avx512_permvar_si_512:
7576; CHECK:       ## %bb.0:
7577; CHECK-NEXT:    vpermps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7578; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7579  %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
7580  ret <16 x i32> %res
7581}
7582
7583define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
7584; X86-LABEL: test_int_x86_avx512_mask_permvar_si_512:
7585; X86:       ## %bb.0:
7586; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7587; X86-NEXT:    vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
7588; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7589; X86-NEXT:    retl ## encoding: [0xc3]
7590;
7591; X64-LABEL: test_int_x86_avx512_mask_permvar_si_512:
7592; X64:       ## %bb.0:
7593; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7594; X64-NEXT:    vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
7595; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7596; X64-NEXT:    retq ## encoding: [0xc3]
7597  %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7598  ret <16 x i32> %res
7599}
7600
7601define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
7602; X86-LABEL: test_int_x86_avx512_maskz_permvar_si_512:
7603; X86:       ## %bb.0:
7604; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7605; X86-NEXT:    vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
7606; X86-NEXT:    retl ## encoding: [0xc3]
7607;
7608; X64-LABEL: test_int_x86_avx512_maskz_permvar_si_512:
7609; X64:       ## %bb.0:
7610; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7611; X64-NEXT:    vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
7612; X64-NEXT:    retq ## encoding: [0xc3]
7613  %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
7614  ret <16 x i32> %res
7615}
7616
7617declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
7618
7619define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7620; CHECK-LABEL: test_int_x86_avx512_pternlog_d_512:
7621; CHECK:       ## %bb.0:
7622; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xc2,0x21]
7623; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7624  %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
7625  ret <16 x i32> %res
7626}
7627
7628define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
7629; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
7630; X86:       ## %bb.0:
7631; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7632; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
7633; X86-NEXT:    retl ## encoding: [0xc3]
7634;
7635; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
7636; X64:       ## %bb.0:
7637; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7638; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
7639; X64-NEXT:    retq ## encoding: [0xc3]
7640  %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
7641  ret <16 x i32> %res
7642}
7643
7644declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
7645
7646define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
7647; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
7648; X86:       ## %bb.0:
7649; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7650; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
7651; X86-NEXT:    retl ## encoding: [0xc3]
7652;
7653; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
7654; X64:       ## %bb.0:
7655; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7656; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
7657; X64-NEXT:    retq ## encoding: [0xc3]
7658  %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
7659  ret <16 x i32> %res
7660}
7661
7662declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
7663
7664define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7665; CHECK-LABEL: test_int_x86_avx512_pternlog_q_512:
7666; CHECK:       ## %bb.0:
7667; CHECK-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0x21]
7668; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7669  %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
7670  ret <8 x i64> %res
7671}
7672
7673define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
7674; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
7675; X86:       ## %bb.0:
7676; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7677; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7678; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
7679; X86-NEXT:    retl ## encoding: [0xc3]
7680;
7681; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
7682; X64:       ## %bb.0:
7683; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7684; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
7685; X64-NEXT:    retq ## encoding: [0xc3]
7686  %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
7687  ret <8 x i64> %res
7688}
7689
7690declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
7691
7692define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
7693; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
7694; X86:       ## %bb.0:
7695; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7696; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7697; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
7698; X86-NEXT:    retl ## encoding: [0xc3]
7699;
7700; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
7701; X64:       ## %bb.0:
7702; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7703; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
7704; X64-NEXT:    retq ## encoding: [0xc3]
7705  %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
7706  ret <8 x i64> %res
7707}
7708
7709declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7710
7711define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4) {
7712; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_512:
7713; CHECK:       ## %bb.0:
7714; CHECK-NEXT:    vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
7715; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7716  %x2 = load <16 x i32>, <16 x i32>* %x2p
7717  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
7718  ret <16 x i32> %res
7719}
7720
7721define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
7722; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
7723; X86:       ## %bb.0:
7724; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7725; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7726; X86-NEXT:    vpermi2d (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x08]
7727; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7728; X86-NEXT:    retl ## encoding: [0xc3]
7729;
7730; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
7731; X64:       ## %bb.0:
7732; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7733; X64-NEXT:    vpermi2d (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x0f]
7734; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7735; X64-NEXT:    retq ## encoding: [0xc3]
7736  %x2 = load <16 x i32>, <16 x i32>* %x2p
7737  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7738  ret <16 x i32> %res
7739}
7740
7741declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
7742
7743define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
7744; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_512:
7745; CHECK:       ## %bb.0:
7746; CHECK-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xc2]
7747; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7748  %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
7749  ret <8 x double> %res
7750}
7751
7752define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
7753; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
7754; X86:       ## %bb.0:
7755; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7756; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7757; X86-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
7758; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
7759; X86-NEXT:    retl ## encoding: [0xc3]
7760;
7761; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
7762; X64:       ## %bb.0:
7763; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7764; X64-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
7765; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
7766; X64-NEXT:    retq ## encoding: [0xc3]
7767  %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
7768  ret <8 x double> %res
7769}
7770
7771declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
7772
7773define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
7774; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_512:
7775; CHECK:       ## %bb.0:
7776; CHECK-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xc2]
7777; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7778  %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
7779  ret <16 x float> %res
7780}
7781
7782define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
7783; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
7784; X86:       ## %bb.0:
7785; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7786; X86-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
7787; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
7788; X86-NEXT:    retl ## encoding: [0xc3]
7789;
7790; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
7791; X64:       ## %bb.0:
7792; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7793; X64-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
7794; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
7795; X64-NEXT:    retq ## encoding: [0xc3]
7796  %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
7797  ret <16 x float> %res
7798}
7799
7800declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7801
7802define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7803; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_512:
7804; CHECK:       ## %bb.0:
7805; CHECK-NEXT:    vpermt2q %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xc2]
7806; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7807  %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7808  ret <8 x i64> %res
7809}
7810
7811define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7812; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7813; X86:       ## %bb.0:
7814; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7815; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7816; X86-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7817; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7818; X86-NEXT:    retl ## encoding: [0xc3]
7819;
7820; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7821; X64:       ## %bb.0:
7822; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7823; X64-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7824; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7825; X64-NEXT:    retq ## encoding: [0xc3]
7826  %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7827  ret <8 x i64> %res
7828}
7829
7830declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7831
7832define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
7833; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7834; X86:       ## %bb.0:
7835; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7836; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7837; X86-NEXT:    vpermi2d (%eax), %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x76,0x00]
7838; X86-NEXT:    retl ## encoding: [0xc3]
7839;
7840; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7841; X64:       ## %bb.0:
7842; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7843; X64-NEXT:    vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x76,0x07]
7844; X64-NEXT:    retq ## encoding: [0xc3]
7845  %x2 = load <16 x i32>, <16 x i32>* %x2p
7846  %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7847  ret <16 x i32> %res
7848}
7849
7850declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
7851
7852define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
7853; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7854; X86:       ## %bb.0:
7855; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7856; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7857; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7858; X86-NEXT:    vpermi2pd (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xd9,0x77,0x00]
7859; X86-NEXT:    retl ## encoding: [0xc3]
7860;
7861; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7862; X64:       ## %bb.0:
7863; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7864; X64-NEXT:    vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xd9,0x77,0x07]
7865; X64-NEXT:    retq ## encoding: [0xc3]
7866  %x2s = load double, double* %x2ptr
7867  %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
7868  %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
7869  %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
7870  ret <8 x double> %res
7871}
7872
7873declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
7874
7875define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
7876; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7877; X86:       ## %bb.0:
7878; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7879; X86-NEXT:    vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x77,0xc2]
7880; X86-NEXT:    retl ## encoding: [0xc3]
7881;
7882; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7883; X64:       ## %bb.0:
7884; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7885; X64-NEXT:    vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x77,0xc2]
7886; X64-NEXT:    retq ## encoding: [0xc3]
7887  %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
7888  ret <16 x float> %res
7889}
7890
7891
7892declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7893
7894define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7895; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7896; X86:       ## %bb.0:
7897; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7898; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7899; X86-NEXT:    vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x76,0xc2]
7900; X86-NEXT:    retl ## encoding: [0xc3]
7901;
7902; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7903; X64:       ## %bb.0:
7904; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7905; X64-NEXT:    vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x76,0xc2]
7906; X64-NEXT:    retq ## encoding: [0xc3]
7907  %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7908  ret <8 x i64> %res
7909}
7910
7911declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7912
7913define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7914; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_512:
7915; CHECK:       ## %bb.0:
7916; CHECK-NEXT:    vpermi2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x76,0xc2]
7917; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7918  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
7919  ret <16 x i32> %res
7920}
7921
7922define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
7923; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7924; X86:       ## %bb.0:
7925; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7926; X86-NEXT:    vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7927; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7928; X86-NEXT:    retl ## encoding: [0xc3]
7929;
7930; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7931; X64:       ## %bb.0:
7932; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7933; X64-NEXT:    vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7934; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7935; X64-NEXT:    retq ## encoding: [0xc3]
7936  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7937  ret <16 x i32> %res
7938}
7939
7940declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7941declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7942declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
7943
7944define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
7945; CHECK-LABEL: test_vsubps_rn:
7946; CHECK:       ## %bb.0:
7947; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
7948; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7949  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7950                    <16 x float> zeroinitializer, i16 -1, i32 8)
7951  ret <16 x float> %res
7952}
7953
7954define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
7955; CHECK-LABEL: test_vsubps_rd:
7956; CHECK:       ## %bb.0:
7957; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
7958; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7959  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7960                    <16 x float> zeroinitializer, i16 -1, i32 9)
7961  ret <16 x float> %res
7962}
7963
7964define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
7965; CHECK-LABEL: test_vsubps_ru:
7966; CHECK:       ## %bb.0:
7967; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
7968; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7969  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7970                    <16 x float> zeroinitializer, i16 -1, i32 10)
7971  ret <16 x float> %res
7972}
7973
7974define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
7975; CHECK-LABEL: test_vsubps_rz:
7976; CHECK:       ## %bb.0:
7977; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
7978; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7979  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7980                    <16 x float> zeroinitializer, i16 -1, i32 11)
7981  ret <16 x float> %res
7982}
7983
7984define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
7985; CHECK-LABEL: test_vmulps_rn:
7986; CHECK:       ## %bb.0:
7987; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
7988; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7989  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7990                    <16 x float> zeroinitializer, i16 -1, i32 8)
7991  ret <16 x float> %res
7992}
7993
7994define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
7995; CHECK-LABEL: test_vmulps_rd:
7996; CHECK:       ## %bb.0:
7997; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
7998; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
7999  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8000                    <16 x float> zeroinitializer, i16 -1, i32 9)
8001  ret <16 x float> %res
8002}
8003
8004define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
8005; CHECK-LABEL: test_vmulps_ru:
8006; CHECK:       ## %bb.0:
8007; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
8008; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8009  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8010                    <16 x float> zeroinitializer, i16 -1, i32 10)
8011  ret <16 x float> %res
8012}
8013
8014define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
8015; CHECK-LABEL: test_vmulps_rz:
8016; CHECK:       ## %bb.0:
8017; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
8018; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8019  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8020                    <16 x float> zeroinitializer, i16 -1, i32 11)
8021  ret <16 x float> %res
8022}
8023
8024;; mask float
8025define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8026; X86-LABEL: test_vmulps_mask_rn:
8027; X86:       ## %bb.0:
8028; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8029; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
8030; X86-NEXT:    retl ## encoding: [0xc3]
8031;
8032; X64-LABEL: test_vmulps_mask_rn:
8033; X64:       ## %bb.0:
8034; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8035; X64-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
8036; X64-NEXT:    retq ## encoding: [0xc3]
8037  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8038                    <16 x float> zeroinitializer, i16 %mask, i32 8)
8039  ret <16 x float> %res
8040}
8041
8042define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8043; X86-LABEL: test_vmulps_mask_rd:
8044; X86:       ## %bb.0:
8045; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8046; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
8047; X86-NEXT:    retl ## encoding: [0xc3]
8048;
8049; X64-LABEL: test_vmulps_mask_rd:
8050; X64:       ## %bb.0:
8051; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8052; X64-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
8053; X64-NEXT:    retq ## encoding: [0xc3]
8054  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8055                    <16 x float> zeroinitializer, i16 %mask, i32 9)
8056  ret <16 x float> %res
8057}
8058
8059define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8060; X86-LABEL: test_vmulps_mask_ru:
8061; X86:       ## %bb.0:
8062; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8063; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
8064; X86-NEXT:    retl ## encoding: [0xc3]
8065;
8066; X64-LABEL: test_vmulps_mask_ru:
8067; X64:       ## %bb.0:
8068; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8069; X64-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
8070; X64-NEXT:    retq ## encoding: [0xc3]
8071  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8072                    <16 x float> zeroinitializer, i16 %mask, i32 10)
8073  ret <16 x float> %res
8074}
8075
8076define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8077; X86-LABEL: test_vmulps_mask_rz:
8078; X86:       ## %bb.0:
8079; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8080; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
8081; X86-NEXT:    retl ## encoding: [0xc3]
8082;
8083; X64-LABEL: test_vmulps_mask_rz:
8084; X64:       ## %bb.0:
8085; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8086; X64-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
8087; X64-NEXT:    retq ## encoding: [0xc3]
8088  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8089                    <16 x float> zeroinitializer, i16 %mask, i32 11)
8090  ret <16 x float> %res
8091}
8092
8093;; With Passthru value
8094define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8095; X86-LABEL: test_vmulps_mask_passthru_rn:
8096; X86:       ## %bb.0:
8097; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8098; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
8099; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8100; X86-NEXT:    retl ## encoding: [0xc3]
8101;
8102; X64-LABEL: test_vmulps_mask_passthru_rn:
8103; X64:       ## %bb.0:
8104; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8105; X64-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
8106; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8107; X64-NEXT:    retq ## encoding: [0xc3]
8108  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8109                    <16 x float> %passthru, i16 %mask, i32 8)
8110  ret <16 x float> %res
8111}
8112
8113define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8114; X86-LABEL: test_vmulps_mask_passthru_rd:
8115; X86:       ## %bb.0:
8116; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8117; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
8118; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8119; X86-NEXT:    retl ## encoding: [0xc3]
8120;
8121; X64-LABEL: test_vmulps_mask_passthru_rd:
8122; X64:       ## %bb.0:
8123; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8124; X64-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
8125; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8126; X64-NEXT:    retq ## encoding: [0xc3]
8127  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8128                    <16 x float> %passthru, i16 %mask, i32 9)
8129  ret <16 x float> %res
8130}
8131
8132define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8133; X86-LABEL: test_vmulps_mask_passthru_ru:
8134; X86:       ## %bb.0:
8135; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8136; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
8137; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8138; X86-NEXT:    retl ## encoding: [0xc3]
8139;
8140; X64-LABEL: test_vmulps_mask_passthru_ru:
8141; X64:       ## %bb.0:
8142; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8143; X64-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
8144; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8145; X64-NEXT:    retq ## encoding: [0xc3]
8146  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8147                    <16 x float> %passthru, i16 %mask, i32 10)
8148  ret <16 x float> %res
8149}
8150
8151define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8152; X86-LABEL: test_vmulps_mask_passthru_rz:
8153; X86:       ## %bb.0:
8154; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8155; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
8156; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8157; X86-NEXT:    retl ## encoding: [0xc3]
8158;
8159; X64-LABEL: test_vmulps_mask_passthru_rz:
8160; X64:       ## %bb.0:
8161; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8162; X64-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
8163; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8164; X64-NEXT:    retq ## encoding: [0xc3]
8165  %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8166                    <16 x float> %passthru, i16 %mask, i32 11)
8167  ret <16 x float> %res
8168}
8169
8170;; mask double
8171define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8172; X86-LABEL: test_vmulpd_mask_rn:
8173; X86:       ## %bb.0:
8174; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8175; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8176; X86-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
8177; X86-NEXT:    retl ## encoding: [0xc3]
8178;
8179; X64-LABEL: test_vmulpd_mask_rn:
8180; X64:       ## %bb.0:
8181; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8182; X64-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
8183; X64-NEXT:    retq ## encoding: [0xc3]
8184  %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8185                    <8 x double> zeroinitializer, i8 %mask, i32 8)
8186  ret <8 x double> %res
8187}
8188
8189define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8190; X86-LABEL: test_vmulpd_mask_rd:
8191; X86:       ## %bb.0:
8192; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8193; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8194; X86-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
8195; X86-NEXT:    retl ## encoding: [0xc3]
8196;
8197; X64-LABEL: test_vmulpd_mask_rd:
8198; X64:       ## %bb.0:
8199; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8200; X64-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
8201; X64-NEXT:    retq ## encoding: [0xc3]
8202  %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8203                    <8 x double> zeroinitializer, i8 %mask, i32 9)
8204  ret <8 x double> %res
8205}
8206
8207define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8208; X86-LABEL: test_vmulpd_mask_ru:
8209; X86:       ## %bb.0:
8210; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8211; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8212; X86-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
8213; X86-NEXT:    retl ## encoding: [0xc3]
8214;
8215; X64-LABEL: test_vmulpd_mask_ru:
8216; X64:       ## %bb.0:
8217; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8218; X64-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
8219; X64-NEXT:    retq ## encoding: [0xc3]
8220  %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8221                    <8 x double> zeroinitializer, i8 %mask, i32 10)
8222  ret <8 x double> %res
8223}
8224
8225define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8226; X86-LABEL: test_vmulpd_mask_rz:
8227; X86:       ## %bb.0:
8228; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8229; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8230; X86-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
8231; X86-NEXT:    retl ## encoding: [0xc3]
8232;
8233; X64-LABEL: test_vmulpd_mask_rz:
8234; X64:       ## %bb.0:
8235; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8236; X64-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
8237; X64-NEXT:    retq ## encoding: [0xc3]
8238  %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8239                    <8 x double> zeroinitializer, i8 %mask, i32 11)
8240  ret <8 x double> %res
8241}
8242
8243define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8244; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
8245; X86:       ## %bb.0:
8246; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8247; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
8248; X86-NEXT:    retl ## encoding: [0xc3]
8249;
8250; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
8251; X64:       ## %bb.0:
8252; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8253; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
8254; X64-NEXT:    retq ## encoding: [0xc3]
8255  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8256  ret <16 x float> %res
8257}
8258define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8259; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
8260; X86:       ## %bb.0:
8261; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8262; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
8263; X86-NEXT:    retl ## encoding: [0xc3]
8264;
8265; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
8266; X64:       ## %bb.0:
8267; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8268; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
8269; X64-NEXT:    retq ## encoding: [0xc3]
8270  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
8271  ret <16 x float> %res
8272}
8273define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8274; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
8275; X86:       ## %bb.0:
8276; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8277; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
8278; X86-NEXT:    retl ## encoding: [0xc3]
8279;
8280; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
8281; X64:       ## %bb.0:
8282; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8283; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
8284; X64-NEXT:    retq ## encoding: [0xc3]
8285  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
8286  ret <16 x float> %res
8287}
8288
8289define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8290; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
8291; X86:       ## %bb.0:
8292; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8293; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
8294; X86-NEXT:    retl ## encoding: [0xc3]
8295;
8296; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
8297; X64:       ## %bb.0:
8298; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8299; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
8300; X64-NEXT:    retq ## encoding: [0xc3]
8301  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
8302  ret <16 x float> %res
8303}
8304
8305
8306define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8307; X86-LABEL: test_mm512_maskz_add_round_ps_current:
8308; X86:       ## %bb.0:
8309; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8310; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
8311; X86-NEXT:    retl ## encoding: [0xc3]
8312;
8313; X64-LABEL: test_mm512_maskz_add_round_ps_current:
8314; X64:       ## %bb.0:
8315; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8316; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
8317; X64-NEXT:    retq ## encoding: [0xc3]
8318  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8319  ret <16 x float> %res
8320}
8321
8322define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8323; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae:
8324; X86:       ## %bb.0:
8325; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8326; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
8327; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8328; X86-NEXT:    retl ## encoding: [0xc3]
8329;
8330; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae:
8331; X64:       ## %bb.0:
8332; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8333; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
8334; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8335; X64-NEXT:    retq ## encoding: [0xc3]
8336  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8337  ret <16 x float> %res
8338}
8339define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8340; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae:
8341; X86:       ## %bb.0:
8342; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8343; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
8344; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8345; X86-NEXT:    retl ## encoding: [0xc3]
8346;
8347; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae:
8348; X64:       ## %bb.0:
8349; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8350; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
8351; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8352; X64-NEXT:    retq ## encoding: [0xc3]
8353  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8354  ret <16 x float> %res
8355}
8356define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8357; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae:
8358; X86:       ## %bb.0:
8359; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8360; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
8361; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8362; X86-NEXT:    retl ## encoding: [0xc3]
8363;
8364; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae:
8365; X64:       ## %bb.0:
8366; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8367; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
8368; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8369; X64-NEXT:    retq ## encoding: [0xc3]
8370  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8371  ret <16 x float> %res
8372}
8373
8374define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8375; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae:
8376; X86:       ## %bb.0:
8377; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8378; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
8379; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8380; X86-NEXT:    retl ## encoding: [0xc3]
8381;
8382; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae:
8383; X64:       ## %bb.0:
8384; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8385; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
8386; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8387; X64-NEXT:    retq ## encoding: [0xc3]
8388  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8389  ret <16 x float> %res
8390}
8391
8392
8393define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8394; X86-LABEL: test_mm512_mask_add_round_ps_current:
8395; X86:       ## %bb.0:
8396; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8397; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
8398; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8399; X86-NEXT:    retl ## encoding: [0xc3]
8400;
8401; X64-LABEL: test_mm512_mask_add_round_ps_current:
8402; X64:       ## %bb.0:
8403; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8404; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
8405; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8406; X64-NEXT:    retq ## encoding: [0xc3]
8407  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8408  ret <16 x float> %res
8409}
8410
8411
8412define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8413; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
8414; CHECK:       ## %bb.0:
8415; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
8416; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8417  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8418  ret <16 x float> %res
8419}
8420define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8421; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
8422; CHECK:       ## %bb.0:
8423; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
8424; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8425  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8426  ret <16 x float> %res
8427}
8428define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8429; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
8430; CHECK:       ## %bb.0:
8431; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
8432; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8433  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8434  ret <16 x float> %res
8435}
8436
8437define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8438; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
8439; CHECK:       ## %bb.0:
8440; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
8441; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8442  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8443  ret <16 x float> %res
8444}
8445
8446define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8447; CHECK-LABEL: test_mm512_add_round_ps_current:
8448; CHECK:       ## %bb.0:
8449; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
8450; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8451  %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8452  ret <16 x float> %res
8453}
8454declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8455
8456define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8457; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
8458; X86:       ## %bb.0:
8459; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8460; X86-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
8461; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8462; X86-NEXT:    retl ## encoding: [0xc3]
8463;
8464; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
8465; X64:       ## %bb.0:
8466; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8467; X64-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
8468; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8469; X64-NEXT:    retq ## encoding: [0xc3]
8470  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8471  ret <16 x float> %res
8472}
8473define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8474; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
8475; X86:       ## %bb.0:
8476; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8477; X86-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
8478; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8479; X86-NEXT:    retl ## encoding: [0xc3]
8480;
8481; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
8482; X64:       ## %bb.0:
8483; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8484; X64-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
8485; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8486; X64-NEXT:    retq ## encoding: [0xc3]
8487  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8488  ret <16 x float> %res
8489}
8490define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8491; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
8492; X86:       ## %bb.0:
8493; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8494; X86-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
8495; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8496; X86-NEXT:    retl ## encoding: [0xc3]
8497;
8498; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
8499; X64:       ## %bb.0:
8500; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8501; X64-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
8502; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8503; X64-NEXT:    retq ## encoding: [0xc3]
8504  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8505  ret <16 x float> %res
8506}
8507
8508define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8509; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
8510; X86:       ## %bb.0:
8511; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8512; X86-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
8513; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8514; X86-NEXT:    retl ## encoding: [0xc3]
8515;
8516; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
8517; X64:       ## %bb.0:
8518; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8519; X64-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
8520; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8521; X64-NEXT:    retq ## encoding: [0xc3]
8522  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8523  ret <16 x float> %res
8524}
8525
8526
8527define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8528; X86-LABEL: test_mm512_mask_sub_round_ps_current:
8529; X86:       ## %bb.0:
8530; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8531; X86-NEXT:    vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
8532; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8533; X86-NEXT:    retl ## encoding: [0xc3]
8534;
8535; X64-LABEL: test_mm512_mask_sub_round_ps_current:
8536; X64:       ## %bb.0:
8537; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8538; X64-NEXT:    vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
8539; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8540; X64-NEXT:    retq ## encoding: [0xc3]
8541  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8542  ret <16 x float> %res
8543}
8544
8545define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8546; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
8547; CHECK:       ## %bb.0:
8548; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
8549; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8550  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8551  ret <16 x float> %res
8552}
8553define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8554; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
8555; CHECK:       ## %bb.0:
8556; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
8557; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8558  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8559  ret <16 x float> %res
8560}
8561define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8562; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
8563; CHECK:       ## %bb.0:
8564; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
8565; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8566  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8567  ret <16 x float> %res
8568}
8569
8570define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8571; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
8572; CHECK:       ## %bb.0:
8573; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
8574; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8575  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8576  ret <16 x float> %res
8577}
8578
8579define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8580; CHECK-LABEL: test_mm512_sub_round_ps_current:
8581; CHECK:       ## %bb.0:
8582; CHECK-NEXT:    vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1]
8583; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8584  %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8585  ret <16 x float> %res
8586}
8587
8588define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8589; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
8590; X86:       ## %bb.0:
8591; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8592; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
8593; X86-NEXT:    retl ## encoding: [0xc3]
8594;
8595; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
8596; X64:       ## %bb.0:
8597; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8598; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
8599; X64-NEXT:    retq ## encoding: [0xc3]
8600  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8601  ret <16 x float> %res
8602}
8603define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8604; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
8605; X86:       ## %bb.0:
8606; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8607; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
8608; X86-NEXT:    retl ## encoding: [0xc3]
8609;
8610; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
8611; X64:       ## %bb.0:
8612; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8613; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
8614; X64-NEXT:    retq ## encoding: [0xc3]
8615  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
8616  ret <16 x float> %res
8617}
8618define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8619; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
8620; X86:       ## %bb.0:
8621; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8622; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
8623; X86-NEXT:    retl ## encoding: [0xc3]
8624;
8625; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
8626; X64:       ## %bb.0:
8627; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8628; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
8629; X64-NEXT:    retq ## encoding: [0xc3]
8630  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
8631  ret <16 x float> %res
8632}
8633
8634define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8635; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
8636; X86:       ## %bb.0:
8637; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8638; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
8639; X86-NEXT:    retl ## encoding: [0xc3]
8640;
8641; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
8642; X64:       ## %bb.0:
8643; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8644; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
8645; X64-NEXT:    retq ## encoding: [0xc3]
8646  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
8647  ret <16 x float> %res
8648}
8649
8650
8651define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8652; X86-LABEL: test_mm512_maskz_div_round_ps_current:
8653; X86:       ## %bb.0:
8654; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8655; X86-NEXT:    vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
8656; X86-NEXT:    retl ## encoding: [0xc3]
8657;
8658; X64-LABEL: test_mm512_maskz_div_round_ps_current:
8659; X64:       ## %bb.0:
8660; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8661; X64-NEXT:    vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
8662; X64-NEXT:    retq ## encoding: [0xc3]
8663  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8664  ret <16 x float> %res
8665}
8666
8667define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8668; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae:
8669; X86:       ## %bb.0:
8670; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8671; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
8672; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8673; X86-NEXT:    retl ## encoding: [0xc3]
8674;
8675; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae:
8676; X64:       ## %bb.0:
8677; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8678; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
8679; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8680; X64-NEXT:    retq ## encoding: [0xc3]
8681  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8682  ret <16 x float> %res
8683}
8684define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8685; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae:
8686; X86:       ## %bb.0:
8687; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8688; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
8689; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8690; X86-NEXT:    retl ## encoding: [0xc3]
8691;
8692; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae:
8693; X64:       ## %bb.0:
8694; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8695; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
8696; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8697; X64-NEXT:    retq ## encoding: [0xc3]
8698  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8699  ret <16 x float> %res
8700}
8701define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8702; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae:
8703; X86:       ## %bb.0:
8704; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8705; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
8706; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8707; X86-NEXT:    retl ## encoding: [0xc3]
8708;
8709; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae:
8710; X64:       ## %bb.0:
8711; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8712; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
8713; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8714; X64-NEXT:    retq ## encoding: [0xc3]
8715  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8716  ret <16 x float> %res
8717}
8718
8719define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8720; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae:
8721; X86:       ## %bb.0:
8722; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8723; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
8724; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8725; X86-NEXT:    retl ## encoding: [0xc3]
8726;
8727; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae:
8728; X64:       ## %bb.0:
8729; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8730; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
8731; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8732; X64-NEXT:    retq ## encoding: [0xc3]
8733  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8734  ret <16 x float> %res
8735}
8736
8737
8738define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8739; X86-LABEL: test_mm512_mask_div_round_ps_current:
8740; X86:       ## %bb.0:
8741; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8742; X86-NEXT:    vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
8743; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8744; X86-NEXT:    retl ## encoding: [0xc3]
8745;
8746; X64-LABEL: test_mm512_mask_div_round_ps_current:
8747; X64:       ## %bb.0:
8748; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8749; X64-NEXT:    vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
8750; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8751; X64-NEXT:    retq ## encoding: [0xc3]
8752  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8753  ret <16 x float> %res
8754}
8755
8756
8757define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8758; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
8759; CHECK:       ## %bb.0:
8760; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
8761; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8762  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8763  ret <16 x float> %res
8764}
8765define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8766; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
8767; CHECK:       ## %bb.0:
8768; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
8769; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8770  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8771  ret <16 x float> %res
8772}
8773define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8774; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
8775; CHECK:       ## %bb.0:
8776; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
8777; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8778  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8779  ret <16 x float> %res
8780}
8781
8782define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8783; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
8784; CHECK:       ## %bb.0:
8785; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
8786; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8787  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8788  ret <16 x float> %res
8789}
8790
8791define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8792; CHECK-LABEL: test_mm512_div_round_ps_current:
8793; CHECK:       ## %bb.0:
8794; CHECK-NEXT:    vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1]
8795; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
8796  %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8797  ret <16 x float> %res
8798}
8799declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8800
8801define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8802; X86-LABEL: test_mask_compress_store_pd_512:
8803; X86:       ## %bb.0:
8804; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8805; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8806; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8807; X86-NEXT:    vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8808; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8809; X86-NEXT:    retl ## encoding: [0xc3]
8810;
8811; X64-LABEL: test_mask_compress_store_pd_512:
8812; X64:       ## %bb.0:
8813; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8814; X64-NEXT:    vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8815; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8816; X64-NEXT:    retq ## encoding: [0xc3]
8817  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8818  ret void
8819}
8820
8821declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8822
8823define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
8824; X86-LABEL: test_compress_store_pd_512:
8825; X86:       ## %bb.0:
8826; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8827; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8828; X86-NEXT:    vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8829; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8830; X86-NEXT:    retl ## encoding: [0xc3]
8831;
8832; X64-LABEL: test_compress_store_pd_512:
8833; X64:       ## %bb.0:
8834; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8835; X64-NEXT:    vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8836; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8837; X64-NEXT:    retq ## encoding: [0xc3]
8838  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
8839  ret void
8840}
8841
8842define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
8843; X86-LABEL: test_mask_compress_store_ps_512:
8844; X86:       ## %bb.0:
8845; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8846; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8847; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8848; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8849; X86-NEXT:    retl ## encoding: [0xc3]
8850;
8851; X64-LABEL: test_mask_compress_store_ps_512:
8852; X64:       ## %bb.0:
8853; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8854; X64-NEXT:    vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8855; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8856; X64-NEXT:    retq ## encoding: [0xc3]
8857  call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8858  ret void
8859}
8860
8861declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8862
8863define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
8864; X86-LABEL: test_compress_store_ps_512:
8865; X86:       ## %bb.0:
8866; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8867; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8868; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8869; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8870; X86-NEXT:    retl ## encoding: [0xc3]
8871;
8872; X64-LABEL: test_compress_store_ps_512:
8873; X64:       ## %bb.0:
8874; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8875; X64-NEXT:    vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8876; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8877; X64-NEXT:    retq ## encoding: [0xc3]
8878  call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
8879  ret void
8880}
8881
8882define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
8883; X86-LABEL: test_mask_compress_store_q_512:
8884; X86:       ## %bb.0:
8885; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8886; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8887; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8888; X86-NEXT:    vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8889; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8890; X86-NEXT:    retl ## encoding: [0xc3]
8891;
8892; X64-LABEL: test_mask_compress_store_q_512:
8893; X64:       ## %bb.0:
8894; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8895; X64-NEXT:    vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8896; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8897; X64-NEXT:    retq ## encoding: [0xc3]
8898  call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8899  ret void
8900}
8901
8902declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8903
8904define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
8905; X86-LABEL: test_compress_store_q_512:
8906; X86:       ## %bb.0:
8907; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8908; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8909; X86-NEXT:    vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8910; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8911; X86-NEXT:    retl ## encoding: [0xc3]
8912;
8913; X64-LABEL: test_compress_store_q_512:
8914; X64:       ## %bb.0:
8915; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8916; X64-NEXT:    vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8917; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8918; X64-NEXT:    retq ## encoding: [0xc3]
8919  call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
8920  ret void
8921}
8922
8923define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
8924; X86-LABEL: test_mask_compress_store_d_512:
8925; X86:       ## %bb.0:
8926; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8927; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8928; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8929; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8930; X86-NEXT:    retl ## encoding: [0xc3]
8931;
8932; X64-LABEL: test_mask_compress_store_d_512:
8933; X64:       ## %bb.0:
8934; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8935; X64-NEXT:    vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8936; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8937; X64-NEXT:    retq ## encoding: [0xc3]
8938  call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8939  ret void
8940}
8941
8942declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8943
8944define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
8945; X86-LABEL: test_compress_store_d_512:
8946; X86:       ## %bb.0:
8947; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8948; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8949; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8950; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8951; X86-NEXT:    retl ## encoding: [0xc3]
8952;
8953; X64-LABEL: test_compress_store_d_512:
8954; X64:       ## %bb.0:
8955; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8956; X64-NEXT:    vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8957; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
8958; X64-NEXT:    retq ## encoding: [0xc3]
8959  call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
8960  ret void
8961}
8962
8963define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8964; X86-LABEL: test_mask_expand_load_pd_512:
8965; X86:       ## %bb.0:
8966; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8967; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8968; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8969; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
8970; X86-NEXT:    retl ## encoding: [0xc3]
8971;
8972; X64-LABEL: test_mask_expand_load_pd_512:
8973; X64:       ## %bb.0:
8974; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8975; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
8976; X64-NEXT:    retq ## encoding: [0xc3]
8977  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8978  ret <8 x double> %res
8979}
8980
8981define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
8982; X86-LABEL: test_maskz_expand_load_pd_512:
8983; X86:       ## %bb.0:
8984; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8985; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8986; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8987; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00]
8988; X86-NEXT:    retl ## encoding: [0xc3]
8989;
8990; X64-LABEL: test_maskz_expand_load_pd_512:
8991; X64:       ## %bb.0:
8992; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8993; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07]
8994; X64-NEXT:    retq ## encoding: [0xc3]
8995  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
8996  ret <8 x double> %res
8997}
8998
8999declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
9000
9001define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
9002; X86-LABEL: test_expand_load_pd_512:
9003; X86:       ## %bb.0:
9004; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9005; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9006; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
9007; X86-NEXT:    retl ## encoding: [0xc3]
9008;
9009; X64-LABEL: test_expand_load_pd_512:
9010; X64:       ## %bb.0:
9011; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9012; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
9013; X64-NEXT:    retq ## encoding: [0xc3]
9014  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
9015  ret <8 x double> %res
9016}
9017
9018; Make sure we don't crash if you pass 0 to the mask.
9019define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
9020; CHECK-LABEL: test_zero_mask_expand_load_pd_512:
9021; CHECK:       ## %bb.0:
9022; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9023  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
9024  ret <8 x double> %res
9025}
9026
9027define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
9028; X86-LABEL: test_mask_expand_load_ps_512:
9029; X86:       ## %bb.0:
9030; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9031; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9032; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
9033; X86-NEXT:    retl ## encoding: [0xc3]
9034;
9035; X64-LABEL: test_mask_expand_load_ps_512:
9036; X64:       ## %bb.0:
9037; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9038; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
9039; X64-NEXT:    retq ## encoding: [0xc3]
9040  %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
9041  ret <16 x float> %res
9042}
9043
9044define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
9045; X86-LABEL: test_maskz_expand_load_ps_512:
9046; X86:       ## %bb.0:
9047; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9048; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9049; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00]
9050; X86-NEXT:    retl ## encoding: [0xc3]
9051;
9052; X64-LABEL: test_maskz_expand_load_ps_512:
9053; X64:       ## %bb.0:
9054; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9055; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07]
9056; X64-NEXT:    retq ## encoding: [0xc3]
9057  %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
9058  ret <16 x float> %res
9059}
9060
9061declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
9062
9063define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
9064; X86-LABEL: test_expand_load_ps_512:
9065; X86:       ## %bb.0:
9066; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9067; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9068; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
9069; X86-NEXT:    retl ## encoding: [0xc3]
9070;
9071; X64-LABEL: test_expand_load_ps_512:
9072; X64:       ## %bb.0:
9073; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9074; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
9075; X64-NEXT:    retq ## encoding: [0xc3]
9076  %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
9077  ret <16 x float> %res
9078}
9079
9080define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
9081; X86-LABEL: test_mask_expand_load_q_512:
9082; X86:       ## %bb.0:
9083; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9084; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9085; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9086; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
9087; X86-NEXT:    retl ## encoding: [0xc3]
9088;
9089; X64-LABEL: test_mask_expand_load_q_512:
9090; X64:       ## %bb.0:
9091; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9092; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
9093; X64-NEXT:    retq ## encoding: [0xc3]
9094  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
9095  ret <8 x i64> %res
9096}
9097
9098define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
9099; X86-LABEL: test_maskz_expand_load_q_512:
9100; X86:       ## %bb.0:
9101; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9102; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9103; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9104; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00]
9105; X86-NEXT:    retl ## encoding: [0xc3]
9106;
9107; X64-LABEL: test_maskz_expand_load_q_512:
9108; X64:       ## %bb.0:
9109; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9110; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
9111; X64-NEXT:    retq ## encoding: [0xc3]
9112  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
9113  ret <8 x i64> %res
9114}
9115
9116declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
9117
9118define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
9119; X86-LABEL: test_expand_load_q_512:
9120; X86:       ## %bb.0:
9121; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9122; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9123; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
9124; X86-NEXT:    retl ## encoding: [0xc3]
9125;
9126; X64-LABEL: test_expand_load_q_512:
9127; X64:       ## %bb.0:
9128; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9129; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
9130; X64-NEXT:    retq ## encoding: [0xc3]
9131  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
9132  ret <8 x i64> %res
9133}
9134
9135define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
9136; X86-LABEL: test_mask_expand_load_d_512:
9137; X86:       ## %bb.0:
9138; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9139; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9140; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
9141; X86-NEXT:    retl ## encoding: [0xc3]
9142;
9143; X64-LABEL: test_mask_expand_load_d_512:
9144; X64:       ## %bb.0:
9145; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9146; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
9147; X64-NEXT:    retq ## encoding: [0xc3]
9148  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
9149  ret <16 x i32> %res
9150}
9151
9152define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
9153; X86-LABEL: test_maskz_expand_load_d_512:
9154; X86:       ## %bb.0:
9155; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9156; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9157; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00]
9158; X86-NEXT:    retl ## encoding: [0xc3]
9159;
9160; X64-LABEL: test_maskz_expand_load_d_512:
9161; X64:       ## %bb.0:
9162; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9163; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07]
9164; X64-NEXT:    retq ## encoding: [0xc3]
9165  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
9166  ret <16 x i32> %res
9167}
9168
9169declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
9170
9171define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
9172; X86-LABEL: test_expand_load_d_512:
9173; X86:       ## %bb.0:
9174; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9175; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9176; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
9177; X86-NEXT:    retl ## encoding: [0xc3]
9178;
9179; X64-LABEL: test_expand_load_d_512:
9180; X64:       ## %bb.0:
9181; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9182; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
9183; X64-NEXT:    retq ## encoding: [0xc3]
9184  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
9185  ret <16 x i32> %res
9186}
9187
9188define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9189; X86-LABEL: test_mm512_maskz_min_round_ps_sae:
9190; X86:       ## %bb.0:
9191; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9192; X86-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
9193; X86-NEXT:    retl ## encoding: [0xc3]
9194;
9195; X64-LABEL: test_mm512_maskz_min_round_ps_sae:
9196; X64:       ## %bb.0:
9197; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9198; X64-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
9199; X64-NEXT:    retq ## encoding: [0xc3]
9200  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
9201  ret <16 x float> %res
9202}
9203
9204define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9205; X86-LABEL: test_mm512_maskz_min_round_ps_current:
9206; X86:       ## %bb.0:
9207; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9208; X86-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
9209; X86-NEXT:    retl ## encoding: [0xc3]
9210;
9211; X64-LABEL: test_mm512_maskz_min_round_ps_current:
9212; X64:       ## %bb.0:
9213; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9214; X64-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
9215; X64-NEXT:    retq ## encoding: [0xc3]
9216  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
9217  ret <16 x float> %res
9218}
9219
9220define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9221; X86-LABEL: test_mm512_mask_min_round_ps_sae:
9222; X86:       ## %bb.0:
9223; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9224; X86-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
9225; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9226; X86-NEXT:    retl ## encoding: [0xc3]
9227;
9228; X64-LABEL: test_mm512_mask_min_round_ps_sae:
9229; X64:       ## %bb.0:
9230; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9231; X64-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
9232; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9233; X64-NEXT:    retq ## encoding: [0xc3]
9234  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
9235  ret <16 x float> %res
9236}
9237
9238define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9239; X86-LABEL: test_mm512_mask_min_round_ps_current:
9240; X86:       ## %bb.0:
9241; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9242; X86-NEXT:    vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
9243; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9244; X86-NEXT:    retl ## encoding: [0xc3]
9245;
9246; X64-LABEL: test_mm512_mask_min_round_ps_current:
9247; X64:       ## %bb.0:
9248; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9249; X64-NEXT:    vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
9250; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9251; X64-NEXT:    retq ## encoding: [0xc3]
9252  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
9253  ret <16 x float> %res
9254}
9255
9256define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9257; CHECK-LABEL: test_mm512_min_round_ps_sae:
9258; CHECK:       ## %bb.0:
9259; CHECK-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1]
9260; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9261  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
9262  ret <16 x float> %res
9263}
9264
9265define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9266; CHECK-LABEL: test_mm512_min_round_ps_current:
9267; CHECK:       ## %bb.0:
9268; CHECK-NEXT:    vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1]
9269; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9270  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
9271  ret <16 x float> %res
9272}
9273declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
9274
9275define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9276; X86-LABEL: test_mm512_maskz_max_round_ps_sae:
9277; X86:       ## %bb.0:
9278; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9279; X86-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
9280; X86-NEXT:    retl ## encoding: [0xc3]
9281;
9282; X64-LABEL: test_mm512_maskz_max_round_ps_sae:
9283; X64:       ## %bb.0:
9284; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9285; X64-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
9286; X64-NEXT:    retq ## encoding: [0xc3]
9287  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
9288  ret <16 x float> %res
9289}
9290
9291define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9292; X86-LABEL: test_mm512_maskz_max_round_ps_current:
9293; X86:       ## %bb.0:
9294; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9295; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
9296; X86-NEXT:    retl ## encoding: [0xc3]
9297;
9298; X64-LABEL: test_mm512_maskz_max_round_ps_current:
9299; X64:       ## %bb.0:
9300; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9301; X64-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
9302; X64-NEXT:    retq ## encoding: [0xc3]
9303  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
9304  ret <16 x float> %res
9305}
9306
9307define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9308; X86-LABEL: test_mm512_mask_max_round_ps_sae:
9309; X86:       ## %bb.0:
9310; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9311; X86-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
9312; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9313; X86-NEXT:    retl ## encoding: [0xc3]
9314;
9315; X64-LABEL: test_mm512_mask_max_round_ps_sae:
9316; X64:       ## %bb.0:
9317; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9318; X64-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
9319; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9320; X64-NEXT:    retq ## encoding: [0xc3]
9321  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
9322  ret <16 x float> %res
9323}
9324
9325define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9326; X86-LABEL: test_mm512_mask_max_round_ps_current:
9327; X86:       ## %bb.0:
9328; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9329; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
9330; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9331; X86-NEXT:    retl ## encoding: [0xc3]
9332;
9333; X64-LABEL: test_mm512_mask_max_round_ps_current:
9334; X64:       ## %bb.0:
9335; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9336; X64-NEXT:    vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
9337; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9338; X64-NEXT:    retq ## encoding: [0xc3]
9339  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
9340  ret <16 x float> %res
9341}
9342
9343define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9344; CHECK-LABEL: test_mm512_max_round_ps_sae:
9345; CHECK:       ## %bb.0:
9346; CHECK-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1]
9347; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9348  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
9349  ret <16 x float> %res
9350}
9351
9352define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9353; CHECK-LABEL: test_mm512_max_round_ps_current:
9354; CHECK:       ## %bb.0:
9355; CHECK-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1]
9356; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9357  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
9358  ret <16 x float> %res
9359}
9360declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
9361
9362define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
9363; CHECK-LABEL: test_sqrt_pd_512:
9364; CHECK:       ## %bb.0:
9365; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x51,0xc0]
9366; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9367  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> undef, i8 -1, i32 4)
9368  ret <8 x double> %res
9369}
9370define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
9371; X86-LABEL: test_mask_sqrt_pd_512:
9372; X86:       ## %bb.0:
9373; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9374; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9375; X86-NEXT:    vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
9376; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9377; X86-NEXT:    retl ## encoding: [0xc3]
9378;
9379; X64-LABEL: test_mask_sqrt_pd_512:
9380; X64:       ## %bb.0:
9381; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9382; X64-NEXT:    vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
9383; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9384; X64-NEXT:    retq ## encoding: [0xc3]
9385  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> %passthru, i8 %mask, i32 4)
9386  ret <8 x double> %res
9387}
9388define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) {
9389; X86-LABEL: test_maskz_sqrt_pd_512:
9390; X86:       ## %bb.0:
9391; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9392; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9393; X86-NEXT:    vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
9394; X86-NEXT:    retl ## encoding: [0xc3]
9395;
9396; X64-LABEL: test_maskz_sqrt_pd_512:
9397; X64:       ## %bb.0:
9398; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9399; X64-NEXT:    vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
9400; X64-NEXT:    retq ## encoding: [0xc3]
9401  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 %mask, i32 4)
9402  ret <8 x double> %res
9403}
9404define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) {
9405; CHECK-LABEL: test_sqrt_round_pd_512:
9406; CHECK:       ## %bb.0:
9407; CHECK-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x78,0x51,0xc0]
9408; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9409  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> undef, i8 -1, i32 11)
9410  ret <8 x double> %res
9411}
9412define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
9413; X86-LABEL: test_mask_sqrt_round_pd_512:
9414; X86:       ## %bb.0:
9415; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9416; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9417; X86-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
9418; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9419; X86-NEXT:    retl ## encoding: [0xc3]
9420;
9421; X64-LABEL: test_mask_sqrt_round_pd_512:
9422; X64:       ## %bb.0:
9423; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9424; X64-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
9425; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9426; X64-NEXT:    retq ## encoding: [0xc3]
9427  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> %passthru, i8 %mask, i32 11)
9428  ret <8 x double> %res
9429}
9430define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) {
9431; X86-LABEL: test_maskz_sqrt_round_pd_512:
9432; X86:       ## %bb.0:
9433; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9434; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9435; X86-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
9436; X86-NEXT:    retl ## encoding: [0xc3]
9437;
9438; X64-LABEL: test_maskz_sqrt_round_pd_512:
9439; X64:       ## %bb.0:
9440; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9441; X64-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
9442; X64-NEXT:    retq ## encoding: [0xc3]
9443  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 %mask, i32 11)
9444  ret <8 x double> %res
9445}
9446declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
9447
9448define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
9449; CHECK-LABEL: test_sqrt_ps_512:
9450; CHECK:       ## %bb.0:
9451; CHECK-NEXT:    vsqrtps %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x51,0xc0]
9452; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9453  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4)
9454  ret <16 x float> %res
9455}
9456define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
9457; X86-LABEL: test_mask_sqrt_ps_512:
9458; X86:       ## %bb.0:
9459; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9460; X86-NEXT:    vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
9461; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9462; X86-NEXT:    retl ## encoding: [0xc3]
9463;
9464; X64-LABEL: test_mask_sqrt_ps_512:
9465; X64:       ## %bb.0:
9466; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9467; X64-NEXT:    vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
9468; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9469; X64-NEXT:    retq ## encoding: [0xc3]
9470  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4)
9471  ret <16 x float> %res
9472}
9473define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) {
9474; X86-LABEL: test_maskz_sqrt_ps_512:
9475; X86:       ## %bb.0:
9476; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9477; X86-NEXT:    vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
9478; X86-NEXT:    retl ## encoding: [0xc3]
9479;
9480; X64-LABEL: test_maskz_sqrt_ps_512:
9481; X64:       ## %bb.0:
9482; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9483; X64-NEXT:    vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
9484; X64-NEXT:    retq ## encoding: [0xc3]
9485  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
9486  ret <16 x float> %res
9487}
9488define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
9489; CHECK-LABEL: test_sqrt_round_ps_512:
9490; CHECK:       ## %bb.0:
9491; CHECK-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x51,0xc0]
9492; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9493  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11)
9494  ret <16 x float> %res
9495}
9496define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
9497; X86-LABEL: test_mask_sqrt_round_ps_512:
9498; X86:       ## %bb.0:
9499; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9500; X86-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
9501; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9502; X86-NEXT:    retl ## encoding: [0xc3]
9503;
9504; X64-LABEL: test_mask_sqrt_round_ps_512:
9505; X64:       ## %bb.0:
9506; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9507; X64-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
9508; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9509; X64-NEXT:    retq ## encoding: [0xc3]
9510  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11)
9511  ret <16 x float> %res
9512}
9513define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) {
9514; X86-LABEL: test_maskz_sqrt_round_ps_512:
9515; X86:       ## %bb.0:
9516; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9517; X86-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
9518; X86-NEXT:    retl ## encoding: [0xc3]
9519;
9520; X64-LABEL: test_maskz_sqrt_round_ps_512:
9521; X64:       ## %bb.0:
9522; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9523; X64-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
9524; X64-NEXT:    retq ## encoding: [0xc3]
9525  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11)
9526  ret <16 x float> %res
9527}
9528declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
9529
9530declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
9531
9532define <16 x i32>@test_int_x86_avx512_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
9533; CHECK-LABEL: test_int_x86_avx512_prolv_d_512_old:
9534; CHECK:       ## %bb.0:
9535; CHECK-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xc1]
9536; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9537  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
9538  ret <16 x i32> %res
9539}
9540
9541define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
9542; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512_old:
9543; X86:       ## %bb.0:
9544; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9545; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
9546; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9547; X86-NEXT:    retl ## encoding: [0xc3]
9548;
9549; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512_old:
9550; X64:       ## %bb.0:
9551; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9552; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
9553; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9554; X64-NEXT:    retq ## encoding: [0xc3]
9555  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
9556  ret <16 x i32> %res
9557}
9558
9559define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
9560; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_512_old:
9561; X86:       ## %bb.0:
9562; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9563; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
9564; X86-NEXT:    retl ## encoding: [0xc3]
9565;
9566; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_512_old:
9567; X64:       ## %bb.0:
9568; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9569; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
9570; X64-NEXT:    retq ## encoding: [0xc3]
9571  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
9572  ret <16 x i32> %res
9573}
9574
9575declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9576
9577define <8 x i64>@test_int_x86_avx512_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
9578; CHECK-LABEL: test_int_x86_avx512_prolv_q_512_old:
9579; CHECK:       ## %bb.0:
9580; CHECK-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xc1]
9581; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9582  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
9583  ret <8 x i64> %res
9584}
9585
9586define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
9587; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512_old:
9588; X86:       ## %bb.0:
9589; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9590; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9591; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
9592; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9593; X86-NEXT:    retl ## encoding: [0xc3]
9594;
9595; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512_old:
9596; X64:       ## %bb.0:
9597; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9598; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
9599; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9600; X64-NEXT:    retq ## encoding: [0xc3]
9601  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
9602  ret <8 x i64> %res
9603}
9604
9605define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
9606; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_512_old:
9607; X86:       ## %bb.0:
9608; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9609; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9610; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
9611; X86-NEXT:    retl ## encoding: [0xc3]
9612;
9613; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_512_old:
9614; X64:       ## %bb.0:
9615; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9616; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
9617; X64-NEXT:    retq ## encoding: [0xc3]
9618  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
9619  ret <8 x i64> %res
9620}
9621
9622declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
9623
9624define <16 x i32>@test_int_x86_avx512_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
9625; CHECK-LABEL: test_int_x86_avx512_prorv_d_512_old:
9626; CHECK:       ## %bb.0:
9627; CHECK-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xc1]
9628; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9629  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
9630  ret <16 x i32> %res
9631}
9632
9633define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
9634; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512_old:
9635; X86:       ## %bb.0:
9636; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9637; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
9638; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9639; X86-NEXT:    retl ## encoding: [0xc3]
9640;
9641; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512_old:
9642; X64:       ## %bb.0:
9643; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9644; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
9645; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9646; X64-NEXT:    retq ## encoding: [0xc3]
9647  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
9648  ret <16 x i32> %res
9649}
9650
9651define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
9652; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_512_old:
9653; X86:       ## %bb.0:
9654; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9655; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
9656; X86-NEXT:    retl ## encoding: [0xc3]
9657;
9658; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_512_old:
9659; X64:       ## %bb.0:
9660; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9661; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
9662; X64-NEXT:    retq ## encoding: [0xc3]
9663  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
9664  ret <16 x i32> %res
9665}
9666
9667declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9668
9669define <8 x i64>@test_int_x86_avx512_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
9670; CHECK-LABEL: test_int_x86_avx512_prorv_q_512_old:
9671; CHECK:       ## %bb.0:
9672; CHECK-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xc1]
9673; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
9674  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
9675  ret <8 x i64> %res
9676}
9677
9678define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
9679; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512_old:
9680; X86:       ## %bb.0:
9681; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9682; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9683; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
9684; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9685; X86-NEXT:    retl ## encoding: [0xc3]
9686;
9687; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512_old:
9688; X64:       ## %bb.0:
9689; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9690; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
9691; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9692; X64-NEXT:    retq ## encoding: [0xc3]
9693  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
9694  ret <8 x i64> %res
9695}
9696
9697define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
9698; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_512_old:
9699; X86:       ## %bb.0:
9700; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9701; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9702; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
9703; X86-NEXT:    retl ## encoding: [0xc3]
9704;
9705; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_512_old:
9706; X64:       ## %bb.0:
9707; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9708; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
9709; X64-NEXT:    retq ## encoding: [0xc3]
9710  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
9711  ret <8 x i64> %res
9712}
9713
9714declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16)
9715
9716define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
9717; X86-LABEL: test_int_x86_avx512_mask_prol_d_512:
9718; X86:       ## %bb.0:
9719; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9720; X86-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
9721; X86-NEXT:    vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
9722; X86-NEXT:    vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
9723; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9724; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9725; X86-NEXT:    retl ## encoding: [0xc3]
9726;
9727; X64-LABEL: test_int_x86_avx512_mask_prol_d_512:
9728; X64:       ## %bb.0:
9729; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9730; X64-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
9731; X64-NEXT:    vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
9732; X64-NEXT:    vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
9733; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9734; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9735; X64-NEXT:    retq ## encoding: [0xc3]
9736  %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
9737  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
9738  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
9739  %res3 = add <16 x i32> %res, %res1
9740  %res4 = add <16 x i32> %res3, %res2
9741  ret <16 x i32> %res4
9742}
9743
9744declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8)
9745
9746define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
9747; X86-LABEL: test_int_x86_avx512_mask_prol_q_512:
9748; X86:       ## %bb.0:
9749; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
9750; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9751; X86-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
9752; X86-NEXT:    vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
9753; X86-NEXT:    vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
9754; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9755; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9756; X86-NEXT:    retl ## encoding: [0xc3]
9757;
9758; X64-LABEL: test_int_x86_avx512_mask_prol_q_512:
9759; X64:       ## %bb.0:
9760; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9761; X64-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
9762; X64-NEXT:    vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
9763; X64-NEXT:    vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
9764; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9765; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9766; X64-NEXT:    retq ## encoding: [0xc3]
9767  %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
9768  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
9769  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
9770  %res3 = add <8 x i64> %res, %res1
9771  %res4 = add <8 x i64> %res3, %res2
9772  ret <8 x i64> %res4
9773}
9774
9775declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16)
9776
9777define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
9778; X86-LABEL: test_int_x86_avx512_mask_pror_d_512:
9779; X86:       ## %bb.0:
9780; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9781; X86-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
9782; X86-NEXT:    vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
9783; X86-NEXT:    vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
9784; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9785; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9786; X86-NEXT:    retl ## encoding: [0xc3]
9787;
9788; X64-LABEL: test_int_x86_avx512_mask_pror_d_512:
9789; X64:       ## %bb.0:
9790; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9791; X64-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
9792; X64-NEXT:    vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
9793; X64-NEXT:    vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
9794; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9795; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9796; X64-NEXT:    retq ## encoding: [0xc3]
9797  %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
9798  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
9799  %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
9800  %res3 = add <16 x i32> %res, %res1
9801  %res4 = add <16 x i32> %res3, %res2
9802  ret <16 x i32> %res4
9803}
9804
9805declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8)
9806
9807define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
9808; X86-LABEL: test_int_x86_avx512_mask_pror_q_512:
9809; X86:       ## %bb.0:
9810; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
9811; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9812; X86-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
9813; X86-NEXT:    vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
9814; X86-NEXT:    vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
9815; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9816; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9817; X86-NEXT:    retl ## encoding: [0xc3]
9818;
9819; X64-LABEL: test_int_x86_avx512_mask_pror_q_512:
9820; X64:       ## %bb.0:
9821; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9822; X64-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
9823; X64-NEXT:    vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
9824; X64-NEXT:    vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
9825; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9826; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9827; X64-NEXT:    retq ## encoding: [0xc3]
9828  %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
9829  %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
9830  %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
9831  %res3 = add <8 x i64> %res, %res1
9832  %res4 = add <8 x i64> %res3, %res2
9833  ret <8 x i64> %res4
9834}
9835
9836declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9837
9838define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9839; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9840; X86:       ## %bb.0:
9841; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9842; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9843; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9844; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
9845; X86-NEXT:    ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9846; X86-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9847; X86-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9848; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9849; X86-NEXT:    vfmadd213sd {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa9,0xc2]
9850; X86-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc3]
9851; X86-NEXT:    retl ## encoding: [0xc3]
9852;
9853; X64-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9854; X64:       ## %bb.0:
9855; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9856; X64-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9857; X64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
9858; X64-NEXT:    ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9859; X64-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9860; X64-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9861; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9862; X64-NEXT:    vfmadd213sd {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa9,0xc2]
9863; X64-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc3]
9864; X64-NEXT:    retq ## encoding: [0xc3]
9865  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9866  %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9867  %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
9868  %res3 = fadd <2 x double> %res, %res1
9869  %res4 = fadd <2 x double> %res2, %res3
9870  ret <2 x double> %res4
9871}
9872
9873declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9874
9875define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9876; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9877; X86:       ## %bb.0:
9878; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9879; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9880; X86-NEXT:    vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9881; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
9882; X86-NEXT:    ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9883; X86-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9884; X86-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9885; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9886; X86-NEXT:    vfmadd213ss {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa9,0xc2]
9887; X86-NEXT:    vaddps %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc3]
9888; X86-NEXT:    retl ## encoding: [0xc3]
9889;
9890; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9891; X64:       ## %bb.0:
9892; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9893; X64-NEXT:    vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9894; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
9895; X64-NEXT:    ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9896; X64-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9897; X64-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9898; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9899; X64-NEXT:    vfmadd213ss {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa9,0xc2]
9900; X64-NEXT:    vaddps %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc3]
9901; X64-NEXT:    retq ## encoding: [0xc3]
9902  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9903  %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9904  %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
9905  %res3 = fadd <4 x float> %res, %res1
9906  %res4 = fadd <4 x float> %res2, %res3
9907  ret <4 x float> %res4
9908}
9909
9910declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9911
9912define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9913; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9914; X86:       ## %bb.0:
9915; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9916; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9917; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9918; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9919; X86-NEXT:    ## xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
9920; X86-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9921; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9922; X86-NEXT:    retl ## encoding: [0xc3]
9923;
9924; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9925; X64:       ## %bb.0:
9926; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9927; X64-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9928; X64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9929; X64-NEXT:    ## xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
9930; X64-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9931; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9932; X64-NEXT:    retq ## encoding: [0xc3]
9933  %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9934  %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9935  %res2 = fadd <2 x double> %res, %res1
9936  ret <2 x double> %res2
9937}
9938
9939declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9940
9941define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9942; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9943; X86:       ## %bb.0:
9944; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9945; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9946; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9947; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
9948; X86-NEXT:    retl ## encoding: [0xc3]
9949;
9950; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9951; X64:       ## %bb.0:
9952; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9953; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9954; X64-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
9955; X64-NEXT:    retq ## encoding: [0xc3]
9956  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9957  %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9958  %res2 = fadd <4 x float> %res, %res1
9959  ret <4 x float> %res
9960}
9961declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9962
9963define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9964; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9965; X86:       ## %bb.0:
9966; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9967; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9968; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9969; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
9970; X86-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
9971; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9972; X86-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9973; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9974; X86-NEXT:    vfmadd231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xb9,0xd1]
9975; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
9976; X86-NEXT:    retl ## encoding: [0xc3]
9977;
9978; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9979; X64:       ## %bb.0:
9980; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9981; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9982; X64-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
9983; X64-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
9984; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9985; X64-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9986; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9987; X64-NEXT:    vfmadd231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xb9,0xd1]
9988; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
9989; X64-NEXT:    retq ## encoding: [0xc3]
9990  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9991  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9992  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
9993  %res3 = fadd <2 x double> %res, %res1
9994  %res4 = fadd <2 x double> %res2, %res3
9995  ret <2 x double> %res4
9996}
9997
9998declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9999
10000define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10001; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
10002; X86:       ## %bb.0:
10003; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10004; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10005; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10006; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
10007; X86-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
10008; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10009; X86-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
10010; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10011; X86-NEXT:    vfmadd231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xb9,0xd1]
10012; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10013; X86-NEXT:    retl ## encoding: [0xc3]
10014;
10015; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
10016; X64:       ## %bb.0:
10017; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10018; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10019; X64-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
10020; X64-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
10021; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10022; X64-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
10023; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10024; X64-NEXT:    vfmadd231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xb9,0xd1]
10025; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10026; X64-NEXT:    retq ## encoding: [0xc3]
10027  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10028  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10029  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10030  %res3 = fadd <4 x float> %res, %res1
10031  %res4 = fadd <4 x float> %res2, %res3
10032  ret <4 x float> %res4
10033}
10034
10035define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
10036; X86-LABEL: fmadd_ss_mask_memfold:
10037; X86:       ## %bb.0:
10038; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10039; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10040; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10041; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
10042; X86-NEXT:    ## xmm0 = mem[0],zero,zero,zero
10043; X86-NEXT:    vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
10044; X86-NEXT:    ## xmm1 = mem[0],zero,zero,zero
10045; X86-NEXT:    vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
10046; X86-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
10047; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10048; X86-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
10049; X86-NEXT:    vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
10050; X86-NEXT:    retl ## encoding: [0xc3]
10051;
10052; X64-LABEL: fmadd_ss_mask_memfold:
10053; X64:       ## %bb.0:
10054; X64-NEXT:    vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
10055; X64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
10056; X64-NEXT:    vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
10057; X64-NEXT:    ## xmm1 = mem[0],zero,zero,zero
10058; X64-NEXT:    vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
10059; X64-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
10060; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10061; X64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
10062; X64-NEXT:    vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
10063; X64-NEXT:    retq ## encoding: [0xc3]
10064  %a.val = load float, float* %a
10065  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
10066  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
10067  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
10068  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
10069
10070  %b.val = load float, float* %b
10071  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
10072  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
10073  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
10074  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
10075
10076  %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
10077
10078  %sr = extractelement <4 x float> %vr, i32 0
10079  store float %sr, float* %a
10080  ret void
10081}
10082
10083define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
10084; X86-LABEL: fmadd_ss_maskz_memfold:
10085; X86:       ## %bb.0:
10086; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10087; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10088; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10089; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
10090; X86-NEXT:    ## xmm0 = mem[0],zero,zero,zero
10091; X86-NEXT:    vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
10092; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
10093; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10094; X86-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
10095; X86-NEXT:    vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
10096; X86-NEXT:    retl ## encoding: [0xc3]
10097;
10098; X64-LABEL: fmadd_ss_maskz_memfold:
10099; X64:       ## %bb.0:
10100; X64-NEXT:    vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
10101; X64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
10102; X64-NEXT:    vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
10103; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
10104; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10105; X64-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
10106; X64-NEXT:    vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
10107; X64-NEXT:    retq ## encoding: [0xc3]
10108  %a.val = load float, float* %a
10109  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
10110  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
10111  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
10112  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
10113
10114  %b.val = load float, float* %b
10115  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
10116  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
10117  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
10118  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
10119
10120  %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
10121
10122  %sr = extractelement <4 x float> %vr, i32 0
10123  store float %sr, float* %a
10124  ret void
10125}
10126
10127define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
10128; X86-LABEL: fmadd_sd_mask_memfold:
10129; X86:       ## %bb.0:
10130; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10131; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10132; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10133; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
10134; X86-NEXT:    ## xmm0 = mem[0],zero
10135; X86-NEXT:    vmovsd (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x09]
10136; X86-NEXT:    ## xmm1 = mem[0],zero
10137; X86-NEXT:    vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
10138; X86-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
10139; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10140; X86-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
10141; X86-NEXT:    vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
10142; X86-NEXT:    retl ## encoding: [0xc3]
10143;
10144; X64-LABEL: fmadd_sd_mask_memfold:
10145; X64:       ## %bb.0:
10146; X64-NEXT:    vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
10147; X64-NEXT:    ## xmm0 = mem[0],zero
10148; X64-NEXT:    vmovsd (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0e]
10149; X64-NEXT:    ## xmm1 = mem[0],zero
10150; X64-NEXT:    vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
10151; X64-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
10152; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10153; X64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
10154; X64-NEXT:    vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
10155; X64-NEXT:    retq ## encoding: [0xc3]
10156  %a.val = load double, double* %a
10157  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
10158  %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
10159
10160  %b.val = load double, double* %b
10161  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
10162  %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
10163
10164  %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
10165
10166  %sr = extractelement <2 x double> %vr, i32 0
10167  store double %sr, double* %a
10168  ret void
10169}
10170
10171define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
10172; X86-LABEL: fmadd_sd_maskz_memfold:
10173; X86:       ## %bb.0:
10174; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10175; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10176; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10177; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
10178; X86-NEXT:    ## xmm0 = mem[0],zero
10179; X86-NEXT:    vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
10180; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
10181; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10182; X86-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
10183; X86-NEXT:    vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
10184; X86-NEXT:    retl ## encoding: [0xc3]
10185;
10186; X64-LABEL: fmadd_sd_maskz_memfold:
10187; X64:       ## %bb.0:
10188; X64-NEXT:    vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
10189; X64-NEXT:    ## xmm0 = mem[0],zero
10190; X64-NEXT:    vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
10191; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
10192; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10193; X64-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
10194; X64-NEXT:    vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
10195; X64-NEXT:    retq ## encoding: [0xc3]
10196  %a.val = load double, double* %a
10197  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
10198  %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
10199
10200  %b.val = load double, double* %b
10201  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
10202  %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
10203
10204  %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
10205
10206  %sr = extractelement <2 x double> %vr, i32 0
10207  store double %sr, double* %a
10208  ret void
10209}
10210
10211declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
10212
10213define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
10214; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
10215; X86:       ## %bb.0:
10216; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10217; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10218; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10219; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
10220; X86-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10221; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10222; X86-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
10223; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10224; X86-NEXT:    vfmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbb,0xd1]
10225; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10226; X86-NEXT:    retl ## encoding: [0xc3]
10227;
10228; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
10229; X64:       ## %bb.0:
10230; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10231; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10232; X64-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
10233; X64-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10234; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10235; X64-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
10236; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10237; X64-NEXT:    vfmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbb,0xd1]
10238; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10239; X64-NEXT:    retq ## encoding: [0xc3]
10240  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
10241  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
10242  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
10243  %res3 = fadd <2 x double> %res, %res1
10244  %res4 = fadd <2 x double> %res2, %res3
10245  ret <2 x double> %res4
10246}
10247
10248declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
10249
10250define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10251; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
10252; X86:       ## %bb.0:
10253; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10254; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10255; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10256; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
10257; X86-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10258; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10259; X86-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
10260; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10261; X86-NEXT:    vfmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbb,0xd1]
10262; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10263; X86-NEXT:    retl ## encoding: [0xc3]
10264;
10265; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
10266; X64:       ## %bb.0:
10267; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10268; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10269; X64-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
10270; X64-NEXT:    ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10271; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10272; X64-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
10273; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10274; X64-NEXT:    vfmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbb,0xd1]
10275; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10276; X64-NEXT:    retq ## encoding: [0xc3]
10277  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10278  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10279  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10280  %res3 = fadd <4 x float> %res, %res1
10281  %res4 = fadd <4 x float> %res2, %res3
10282  ret <4 x float> %res4
10283}
10284
10285declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
10286
10287define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
10288; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
10289; X86:       ## %bb.0:
10290; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10291; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10292; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10293; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
10294; X86-NEXT:    ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10295; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10296; X86-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
10297; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10298; X86-NEXT:    vfnmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbf,0xd1]
10299; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10300; X86-NEXT:    retl ## encoding: [0xc3]
10301;
10302; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
10303; X64:       ## %bb.0:
10304; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10305; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10306; X64-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
10307; X64-NEXT:    ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10308; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10309; X64-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
10310; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10311; X64-NEXT:    vfnmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbf,0xd1]
10312; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10313; X64-NEXT:    retq ## encoding: [0xc3]
10314  %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
10315  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
10316  %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
10317  %res3 = fadd <2 x double> %res, %res1
10318  %res4 = fadd <2 x double> %res2, %res3
10319  ret <2 x double> %res4
10320}
10321
10322declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
10323
10324define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10325; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
10326; X86:       ## %bb.0:
10327; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10328; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10329; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10330; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
10331; X86-NEXT:    ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10332; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10333; X86-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
10334; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10335; X86-NEXT:    vfnmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbf,0xd1]
10336; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10337; X86-NEXT:    retl ## encoding: [0xc3]
10338;
10339; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
10340; X64:       ## %bb.0:
10341; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10342; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10343; X64-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
10344; X64-NEXT:    ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10345; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10346; X64-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
10347; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10348; X64-NEXT:    vfnmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbf,0xd1]
10349; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10350; X64-NEXT:    retq ## encoding: [0xc3]
10351  %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10352  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10353  %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10354  %res3 = fadd <4 x float> %res, %res1
10355  %res4 = fadd <4 x float> %res2, %res3
10356  ret <4 x float> %res4
10357}
10358
10359define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
10360; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
10361; X86:       ## %bb.0:
10362; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10363; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
10364; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
10365; X86-NEXT:    vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
10366; X86-NEXT:    ## xmm1 {%k1} = (xmm0 * mem) + xmm1
10367; X86-NEXT:    vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
10368; X86-NEXT:    retl ## encoding: [0xc3]
10369;
10370; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
10371; X64:       ## %bb.0:
10372; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
10373; X64-NEXT:    vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x0f]
10374; X64-NEXT:    ## xmm1 {%k1} = (xmm0 * mem) + xmm1
10375; X64-NEXT:    vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
10376; X64-NEXT:    retq ## encoding: [0xc3]
10377  %q = load float, float* %ptr_b
10378  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10379  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
10380  ret < 4 x float> %res
10381}
10382
10383define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
10384; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
10385; X86:       ## %bb.0:
10386; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10387; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
10388; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
10389; X86-NEXT:    vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
10390; X86-NEXT:    ## xmm0 {%k1} = (xmm0 * mem) + xmm1
10391; X86-NEXT:    retl ## encoding: [0xc3]
10392;
10393; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
10394; X64:       ## %bb.0:
10395; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
10396; X64-NEXT:    vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x07]
10397; X64-NEXT:    ## xmm0 {%k1} = (xmm0 * mem) + xmm1
10398; X64-NEXT:    retq ## encoding: [0xc3]
10399  %q = load float, float* %ptr_b
10400  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10401  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1,  i8 %x3, i32 4)
10402  ret < 4 x float> %res
10403}
10404
10405
10406define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
10407; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
10408; CHECK:       ## %bb.0:
10409; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
10410; CHECK-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
10411; CHECK-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
10412; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10413  %q = load float, float* %ptr_b
10414  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10415  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
10416  ret < 4 x float> %res
10417}
10418
10419declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
10420
10421define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) {
10422; CHECK-LABEL: test_int_x86_avx512_pmov_qd_512:
10423; CHECK:       ## %bb.0:
10424; CHECK-NEXT:    vpmovqd %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc0]
10425; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10426  %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
10427  ret <8 x i32> %res
10428}
10429
10430define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
10431; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
10432; X86:       ## %bb.0:
10433; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10434; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10435; X86-NEXT:    vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
10436; X86-NEXT:    vmovdqa %ymm1, %ymm0 ## encoding: [0xc5,0xfd,0x6f,0xc1]
10437; X86-NEXT:    retl ## encoding: [0xc3]
10438;
10439; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
10440; X64:       ## %bb.0:
10441; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10442; X64-NEXT:    vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
10443; X64-NEXT:    vmovdqa %ymm1, %ymm0 ## encoding: [0xc5,0xfd,0x6f,0xc1]
10444; X64-NEXT:    retq ## encoding: [0xc3]
10445  %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
10446  ret <8 x i32> %res
10447}
10448
10449define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) {
10450; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_512:
10451; X86:       ## %bb.0:
10452; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10453; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10454; X86-NEXT:    vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
10455; X86-NEXT:    retl ## encoding: [0xc3]
10456;
10457; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_512:
10458; X64:       ## %bb.0:
10459; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10460; X64-NEXT:    vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
10461; X64-NEXT:    retq ## encoding: [0xc3]
10462  %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
10463  ret <8 x i32> %res
10464}
10465
10466declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
10467
10468define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
10469; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
10470; X86:       ## %bb.0:
10471; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10472; X86-NEXT:    vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
10473; X86-NEXT:    vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
10474; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10475; X86-NEXT:    retl ## encoding: [0xc3]
10476;
10477; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
10478; X64:       ## %bb.0:
10479; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10480; X64-NEXT:    vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
10481; X64-NEXT:    vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
10482; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10483; X64-NEXT:    retq ## encoding: [0xc3]
10484  %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
10485  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
10486  %res2 = fadd <16 x float> %res, %res1
10487  ret <16 x float> %res2
10488}
10489
10490declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
10491
10492define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
10493; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
10494; X86:       ## %bb.0:
10495; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10496; X86-NEXT:    vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
10497; X86-NEXT:    vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
10498; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10499; X86-NEXT:    retl ## encoding: [0xc3]
10500;
10501; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
10502; X64:       ## %bb.0:
10503; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10504; X64-NEXT:    vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
10505; X64-NEXT:    vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
10506; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10507; X64-NEXT:    retq ## encoding: [0xc3]
10508  %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
10509  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
10510  %res2 = fadd <16 x float> %res, %res1
10511  ret <16 x float> %res2
10512}
10513
10514define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
10515; X86-LABEL: test_mask_compress_pd_512:
10516; X86:       ## %bb.0:
10517; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10518; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10519; X86-NEXT:    vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
10520; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10521; X86-NEXT:    retl ## encoding: [0xc3]
10522;
10523; X64-LABEL: test_mask_compress_pd_512:
10524; X64:       ## %bb.0:
10525; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10526; X64-NEXT:    vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
10527; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10528; X64-NEXT:    retq ## encoding: [0xc3]
10529  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
10530  ret <8 x double> %res
10531}
10532
10533define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
10534; X86-LABEL: test_maskz_compress_pd_512:
10535; X86:       ## %bb.0:
10536; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10537; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10538; X86-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
10539; X86-NEXT:    retl ## encoding: [0xc3]
10540;
10541; X64-LABEL: test_maskz_compress_pd_512:
10542; X64:       ## %bb.0:
10543; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10544; X64-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
10545; X64-NEXT:    retq ## encoding: [0xc3]
10546  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
10547  ret <8 x double> %res
10548}
10549
10550define <8 x double> @test_compress_pd_512(<8 x double> %data) {
10551; CHECK-LABEL: test_compress_pd_512:
10552; CHECK:       ## %bb.0:
10553; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10554  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
10555  ret <8 x double> %res
10556}
10557
10558declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
10559
10560define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
10561; X86-LABEL: test_mask_compress_ps_512:
10562; X86:       ## %bb.0:
10563; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10564; X86-NEXT:    vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
10565; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10566; X86-NEXT:    retl ## encoding: [0xc3]
10567;
10568; X64-LABEL: test_mask_compress_ps_512:
10569; X64:       ## %bb.0:
10570; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10571; X64-NEXT:    vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
10572; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10573; X64-NEXT:    retq ## encoding: [0xc3]
10574  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
10575  ret <16 x float> %res
10576}
10577
10578define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
10579; X86-LABEL: test_maskz_compress_ps_512:
10580; X86:       ## %bb.0:
10581; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10582; X86-NEXT:    vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
10583; X86-NEXT:    retl ## encoding: [0xc3]
10584;
10585; X64-LABEL: test_maskz_compress_ps_512:
10586; X64:       ## %bb.0:
10587; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10588; X64-NEXT:    vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
10589; X64-NEXT:    retq ## encoding: [0xc3]
10590  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
10591  ret <16 x float> %res
10592}
10593
10594define <16 x float> @test_compress_ps_512(<16 x float> %data) {
10595; CHECK-LABEL: test_compress_ps_512:
10596; CHECK:       ## %bb.0:
10597; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10598  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
10599  ret <16 x float> %res
10600}
10601
10602declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
10603
10604define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
10605; X86-LABEL: test_mask_compress_q_512:
10606; X86:       ## %bb.0:
10607; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10608; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10609; X86-NEXT:    vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
10610; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10611; X86-NEXT:    retl ## encoding: [0xc3]
10612;
10613; X64-LABEL: test_mask_compress_q_512:
10614; X64:       ## %bb.0:
10615; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10616; X64-NEXT:    vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
10617; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10618; X64-NEXT:    retq ## encoding: [0xc3]
10619  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
10620  ret <8 x i64> %res
10621}
10622
10623define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
10624; X86-LABEL: test_maskz_compress_q_512:
10625; X86:       ## %bb.0:
10626; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10627; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10628; X86-NEXT:    vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
10629; X86-NEXT:    retl ## encoding: [0xc3]
10630;
10631; X64-LABEL: test_maskz_compress_q_512:
10632; X64:       ## %bb.0:
10633; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10634; X64-NEXT:    vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
10635; X64-NEXT:    retq ## encoding: [0xc3]
10636  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
10637  ret <8 x i64> %res
10638}
10639
10640define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
10641; CHECK-LABEL: test_compress_q_512:
10642; CHECK:       ## %bb.0:
10643; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10644  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
10645  ret <8 x i64> %res
10646}
10647
10648declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
10649
10650define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
10651; X86-LABEL: test_mask_compress_d_512:
10652; X86:       ## %bb.0:
10653; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10654; X86-NEXT:    vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
10655; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10656; X86-NEXT:    retl ## encoding: [0xc3]
10657;
10658; X64-LABEL: test_mask_compress_d_512:
10659; X64:       ## %bb.0:
10660; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10661; X64-NEXT:    vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
10662; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10663; X64-NEXT:    retq ## encoding: [0xc3]
10664  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
10665  ret <16 x i32> %res
10666}
10667
10668define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
10669; X86-LABEL: test_maskz_compress_d_512:
10670; X86:       ## %bb.0:
10671; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10672; X86-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
10673; X86-NEXT:    retl ## encoding: [0xc3]
10674;
10675; X64-LABEL: test_maskz_compress_d_512:
10676; X64:       ## %bb.0:
10677; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10678; X64-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
10679; X64-NEXT:    retq ## encoding: [0xc3]
10680  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
10681  ret <16 x i32> %res
10682}
10683
10684define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
10685; CHECK-LABEL: test_compress_d_512:
10686; CHECK:       ## %bb.0:
10687; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10688  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
10689  ret <16 x i32> %res
10690}
10691
10692declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
10693
10694define <8 x double> @test_expand_pd_512(<8 x double> %data) {
10695; CHECK-LABEL: test_expand_pd_512:
10696; CHECK:       ## %bb.0:
10697; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10698  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
10699  ret <8 x double> %res
10700}
10701
10702define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
10703; X86-LABEL: test_mask_expand_pd_512:
10704; X86:       ## %bb.0:
10705; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10706; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10707; X86-NEXT:    vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
10708; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10709; X86-NEXT:    retl ## encoding: [0xc3]
10710;
10711; X64-LABEL: test_mask_expand_pd_512:
10712; X64:       ## %bb.0:
10713; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10714; X64-NEXT:    vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
10715; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10716; X64-NEXT:    retq ## encoding: [0xc3]
10717  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
10718  ret <8 x double> %res
10719}
10720
10721define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
10722; X86-LABEL: test_maskz_expand_pd_512:
10723; X86:       ## %bb.0:
10724; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10725; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10726; X86-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
10727; X86-NEXT:    retl ## encoding: [0xc3]
10728;
10729; X64-LABEL: test_maskz_expand_pd_512:
10730; X64:       ## %bb.0:
10731; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10732; X64-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
10733; X64-NEXT:    retq ## encoding: [0xc3]
10734  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
10735  ret <8 x double> %res
10736}
10737
10738declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
10739
10740define <16 x float> @test_expand_ps_512(<16 x float> %data) {
10741; CHECK-LABEL: test_expand_ps_512:
10742; CHECK:       ## %bb.0:
10743; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10744  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
10745  ret <16 x float> %res
10746}
10747
10748define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
10749; X86-LABEL: test_mask_expand_ps_512:
10750; X86:       ## %bb.0:
10751; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10752; X86-NEXT:    vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
10753; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10754; X86-NEXT:    retl ## encoding: [0xc3]
10755;
10756; X64-LABEL: test_mask_expand_ps_512:
10757; X64:       ## %bb.0:
10758; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10759; X64-NEXT:    vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
10760; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10761; X64-NEXT:    retq ## encoding: [0xc3]
10762  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
10763  ret <16 x float> %res
10764}
10765
10766define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
10767; X86-LABEL: test_maskz_expand_ps_512:
10768; X86:       ## %bb.0:
10769; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10770; X86-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10771; X86-NEXT:    retl ## encoding: [0xc3]
10772;
10773; X64-LABEL: test_maskz_expand_ps_512:
10774; X64:       ## %bb.0:
10775; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10776; X64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10777; X64-NEXT:    retq ## encoding: [0xc3]
10778  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
10779  ret <16 x float> %res
10780}
10781
10782declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
10783
10784define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
10785; CHECK-LABEL: test_expand_q_512:
10786; CHECK:       ## %bb.0:
10787; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10788  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
10789  ret <8 x i64> %res
10790}
10791
10792define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
10793; X86-LABEL: test_mask_expand_q_512:
10794; X86:       ## %bb.0:
10795; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10796; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10797; X86-NEXT:    vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10798; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10799; X86-NEXT:    retl ## encoding: [0xc3]
10800;
10801; X64-LABEL: test_mask_expand_q_512:
10802; X64:       ## %bb.0:
10803; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10804; X64-NEXT:    vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10805; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10806; X64-NEXT:    retq ## encoding: [0xc3]
10807  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
10808  ret <8 x i64> %res
10809}
10810
10811define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
10812; X86-LABEL: test_maskz_expand_q_512:
10813; X86:       ## %bb.0:
10814; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10815; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10816; X86-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10817; X86-NEXT:    retl ## encoding: [0xc3]
10818;
10819; X64-LABEL: test_maskz_expand_q_512:
10820; X64:       ## %bb.0:
10821; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10822; X64-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10823; X64-NEXT:    retq ## encoding: [0xc3]
10824  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
10825  ret <8 x i64> %res
10826}
10827
10828declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
10829
10830define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
10831; CHECK-LABEL: test_expand_d_512:
10832; CHECK:       ## %bb.0:
10833; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
10834  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
10835  ret <16 x i32> %res
10836}
10837
10838define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
10839; X86-LABEL: test_mask_expand_d_512:
10840; X86:       ## %bb.0:
10841; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10842; X86-NEXT:    vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10843; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10844; X86-NEXT:    retl ## encoding: [0xc3]
10845;
10846; X64-LABEL: test_mask_expand_d_512:
10847; X64:       ## %bb.0:
10848; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10849; X64-NEXT:    vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10850; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10851; X64-NEXT:    retq ## encoding: [0xc3]
10852  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
10853  ret <16 x i32> %res
10854}
10855
10856define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
10857; X86-LABEL: test_maskz_expand_d_512:
10858; X86:       ## %bb.0:
10859; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10860; X86-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10861; X86-NEXT:    retl ## encoding: [0xc3]
10862;
10863; X64-LABEL: test_maskz_expand_d_512:
10864; X64:       ## %bb.0:
10865; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10866; X64-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10867; X64-NEXT:    retq ## encoding: [0xc3]
10868  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
10869  ret <16 x i32> %res
10870}
10871
10872declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
10873
10874define <16 x float> @test_cmp_512(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, float* %p) {
10875; X86-LABEL: test_cmp_512:
10876; X86:       ## %bb.0: ## %entry
10877; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10878; X86-NEXT:    vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01]
10879; X86-NEXT:    vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01]
10880; X86-NEXT:    kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
10881; X86-NEXT:    vmovaps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x00]
10882; X86-NEXT:    retl ## encoding: [0xc3]
10883;
10884; X64-LABEL: test_cmp_512:
10885; X64:       ## %bb.0: ## %entry
10886; X64-NEXT:    vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01]
10887; X64-NEXT:    vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01]
10888; X64-NEXT:    kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
10889; X64-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
10890; X64-NEXT:    retq ## encoding: [0xc3]
10891 entry:
10892   %0 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 1, i32 8)
10893   %1 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %d, i32 1, i32 4)
10894   %2 = bitcast float* %p to <16 x float>*
10895   %3 = load <16 x float>, <16 x float>* %2
10896   %4 = xor <16 x i1> %0, %1
10897   %5 = select <16 x i1> %4, <16 x float> zeroinitializer, <16 x float> %3
10898   ret <16 x float> %5
10899}
10900
10901declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
10902